arb-crawler 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1edbf13a9ded81eb25cf49a3d8f42abf38d86f0f
4
- data.tar.gz: fef12e014d16904d3b5c7b99bdbfc30125985b22
3
+ metadata.gz: 11a51e3d1ff02b3c4c221f655a651a3c96266c93
4
+ data.tar.gz: a24e36d8a46bce45d9f56deaf2c30c821f5f0254
5
5
  SHA512:
6
- metadata.gz: 6938b1e9a8b5270dd7068e9d5c541548905c9ce63259d3b32b80b0a924da79e804c03f155915657d1f8d83eb0ecb25c11a9179397cf6fe53361e89dbc57b7def
7
- data.tar.gz: f603f483c106a317df770b49bd14ca6d08163ae73208cfb82b23aa58798a6f9647dc05b2ff257a3b4550f2689e11d34b0b695fa028b5fd08dc306bc581bfd428
6
+ metadata.gz: 63cc9d20c65e70f32efa22ec48bf1c8b7c76fa906a25254c0d7f7dfebd6b1c7135774d102ce6e16d294e9c580f543065fc7906e635690c26dceb60944c420823
7
+ data.tar.gz: eb551ccc891e231c710ee228995943a150fa69c292ed3126ebb64a79077028c08b2c8d2c71d3d123ba847b57b7b36db5b79d4962c65795bc47ff3440fd3fae99
@@ -1,5 +1,5 @@
1
1
  module Arb
2
2
  module Crawler
3
- VERSION = "1.0.0"
3
+ VERSION = "1.0.1"
4
4
  end
5
5
  end
data/lib/arb/crawler.rb CHANGED
@@ -15,6 +15,16 @@ module Arb
15
15
  client
16
16
  end
17
17
 
18
+ define_method :filter_str do |str, black_list=nil|
19
+ black_list||=%w{\ / : * ? < > |} << "\n"
20
+ black_list.each do |i|
21
+ loop do
22
+ break unless str.sub!(i,'')
23
+ end
24
+ end
25
+ str
26
+ end
27
+
18
28
  define_method :filename_of_url do |url|
19
29
  url && url[url.rindex('/')+1..-1]
20
30
  end
@@ -34,18 +44,24 @@ module Arb
34
44
  methods.each do |method|
35
45
  ways.each do |way|
36
46
  define_method "#{method}_by_#{way}_raw" do |url,css_or_xpath,&blk|
37
- ::Nokogiri.parse(client.send(method,url).body).send(way,css_or_xpath).tap do |res|
38
- if blk
39
- res.each do |e|
40
- blk[e]
47
+ begin
48
+ ::Nokogiri.parse(client.send(method,url).body).send(way,css_or_xpath).tap do |res|
49
+ if blk
50
+ res.each do |e|
51
+ blk[e]
52
+ end
41
53
  end
42
54
  end
55
+ rescue Exception=>e
56
+ $stderr.puts e
57
+ nil
43
58
  end
44
59
  end
45
60
 
46
61
  define_method "#{method}_by_#{way}" do |url,css_or_xpath,&blk|
47
62
  [].tap do |arr|
48
- send("#{method}_by_#{way}_raw",url,css_or_xpath).each do |nokogiri_element|
63
+ raw=send("#{method}_by_#{way}_raw",url,css_or_xpath)
64
+ raw && raw.each do |nokogiri_element|
49
65
  arr<<Hash.new.tap do |hash|
50
66
  nokogiri_element.attributes.keys.each do |key|
51
67
  hash[key.to_sym]=nokogiri_element.attribute(key).value
@@ -53,7 +69,7 @@ module Arb
53
69
  hash.singleton_class.send :define_method, :text do
54
70
  nokogiri_element.text
55
71
  end
56
- blk[e] if blk
72
+ blk[hash] if blk
57
73
  end
58
74
  end
59
75
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arb-crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - arybin
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-02-12 00:00:00.000000000 Z
11
+ date: 2017-02-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler