arb-crawler 1.0.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1edbf13a9ded81eb25cf49a3d8f42abf38d86f0f
4
- data.tar.gz: fef12e014d16904d3b5c7b99bdbfc30125985b22
3
+ metadata.gz: 11a51e3d1ff02b3c4c221f655a651a3c96266c93
4
+ data.tar.gz: a24e36d8a46bce45d9f56deaf2c30c821f5f0254
5
5
  SHA512:
6
- metadata.gz: 6938b1e9a8b5270dd7068e9d5c541548905c9ce63259d3b32b80b0a924da79e804c03f155915657d1f8d83eb0ecb25c11a9179397cf6fe53361e89dbc57b7def
7
- data.tar.gz: f603f483c106a317df770b49bd14ca6d08163ae73208cfb82b23aa58798a6f9647dc05b2ff257a3b4550f2689e11d34b0b695fa028b5fd08dc306bc581bfd428
6
+ metadata.gz: 63cc9d20c65e70f32efa22ec48bf1c8b7c76fa906a25254c0d7f7dfebd6b1c7135774d102ce6e16d294e9c580f543065fc7906e635690c26dceb60944c420823
7
+ data.tar.gz: eb551ccc891e231c710ee228995943a150fa69c292ed3126ebb64a79077028c08b2c8d2c71d3d123ba847b57b7b36db5b79d4962c65795bc47ff3440fd3fae99
@@ -1,5 +1,5 @@
1
1
  module Arb
2
2
  module Crawler
3
- VERSION = "1.0.0"
3
+ VERSION = "1.0.1"
4
4
  end
5
5
  end
data/lib/arb/crawler.rb CHANGED
@@ -15,6 +15,16 @@ module Arb
15
15
  client
16
16
  end
17
17
 
18
+ define_method :filter_str do |str, black_list=nil|
19
+ black_list||=%w{\ / : * ? < > |} << "\n"
20
+ black_list.each do |i|
21
+ loop do
22
+ break unless str.sub!(i,'')
23
+ end
24
+ end
25
+ str
26
+ end
27
+
18
28
  define_method :filename_of_url do |url|
19
29
  url && url[url.rindex('/')+1..-1]
20
30
  end
@@ -34,18 +44,24 @@ module Arb
34
44
  methods.each do |method|
35
45
  ways.each do |way|
36
46
  define_method "#{method}_by_#{way}_raw" do |url,css_or_xpath,&blk|
37
- ::Nokogiri.parse(client.send(method,url).body).send(way,css_or_xpath).tap do |res|
38
- if blk
39
- res.each do |e|
40
- blk[e]
47
+ begin
48
+ ::Nokogiri.parse(client.send(method,url).body).send(way,css_or_xpath).tap do |res|
49
+ if blk
50
+ res.each do |e|
51
+ blk[e]
52
+ end
41
53
  end
42
54
  end
55
+ rescue Exception=>e
56
+ $stderr.puts e
57
+ nil
43
58
  end
44
59
  end
45
60
 
46
61
  define_method "#{method}_by_#{way}" do |url,css_or_xpath,&blk|
47
62
  [].tap do |arr|
48
- send("#{method}_by_#{way}_raw",url,css_or_xpath).each do |nokogiri_element|
63
+ raw=send("#{method}_by_#{way}_raw",url,css_or_xpath)
64
+ raw && raw.each do |nokogiri_element|
49
65
  arr<<Hash.new.tap do |hash|
50
66
  nokogiri_element.attributes.keys.each do |key|
51
67
  hash[key.to_sym]=nokogiri_element.attribute(key).value
@@ -53,7 +69,7 @@ module Arb
53
69
  hash.singleton_class.send :define_method, :text do
54
70
  nokogiri_element.text
55
71
  end
56
- blk[e] if blk
72
+ blk[hash] if blk
57
73
  end
58
74
  end
59
75
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arb-crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - arybin
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-02-12 00:00:00.000000000 Z
11
+ date: 2017-02-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler