arb-crawler 1.0.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/arb/crawler/version.rb +1 -1
- data/lib/arb/crawler.rb +22 -6
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 11a51e3d1ff02b3c4c221f655a651a3c96266c93
|
4
|
+
data.tar.gz: a24e36d8a46bce45d9f56deaf2c30c821f5f0254
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 63cc9d20c65e70f32efa22ec48bf1c8b7c76fa906a25254c0d7f7dfebd6b1c7135774d102ce6e16d294e9c580f543065fc7906e635690c26dceb60944c420823
|
7
|
+
data.tar.gz: eb551ccc891e231c710ee228995943a150fa69c292ed3126ebb64a79077028c08b2c8d2c71d3d123ba847b57b7b36db5b79d4962c65795bc47ff3440fd3fae99
|
data/lib/arb/crawler/version.rb
CHANGED
data/lib/arb/crawler.rb
CHANGED
@@ -15,6 +15,16 @@ module Arb
|
|
15
15
|
client
|
16
16
|
end
|
17
17
|
|
18
|
+
define_method :filter_str do |str, black_list=nil|
|
19
|
+
black_list||=%w{\ / : * ? < > |} << "\n"
|
20
|
+
black_list.each do |i|
|
21
|
+
loop do
|
22
|
+
break unless str.sub!(i,'')
|
23
|
+
end
|
24
|
+
end
|
25
|
+
str
|
26
|
+
end
|
27
|
+
|
18
28
|
define_method :filename_of_url do |url|
|
19
29
|
url && url[url.rindex('/')+1..-1]
|
20
30
|
end
|
@@ -34,18 +44,24 @@ module Arb
|
|
34
44
|
methods.each do |method|
|
35
45
|
ways.each do |way|
|
36
46
|
define_method "#{method}_by_#{way}_raw" do |url,css_or_xpath,&blk|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
47
|
+
begin
|
48
|
+
::Nokogiri.parse(client.send(method,url).body).send(way,css_or_xpath).tap do |res|
|
49
|
+
if blk
|
50
|
+
res.each do |e|
|
51
|
+
blk[e]
|
52
|
+
end
|
41
53
|
end
|
42
54
|
end
|
55
|
+
rescue Exception=>e
|
56
|
+
$stderr.puts e
|
57
|
+
nil
|
43
58
|
end
|
44
59
|
end
|
45
60
|
|
46
61
|
define_method "#{method}_by_#{way}" do |url,css_or_xpath,&blk|
|
47
62
|
[].tap do |arr|
|
48
|
-
send("#{method}_by_#{way}_raw",url,css_or_xpath)
|
63
|
+
raw=send("#{method}_by_#{way}_raw",url,css_or_xpath)
|
64
|
+
raw && raw.each do |nokogiri_element|
|
49
65
|
arr<<Hash.new.tap do |hash|
|
50
66
|
nokogiri_element.attributes.keys.each do |key|
|
51
67
|
hash[key.to_sym]=nokogiri_element.attribute(key).value
|
@@ -53,7 +69,7 @@ module Arb
|
|
53
69
|
hash.singleton_class.send :define_method, :text do
|
54
70
|
nokogiri_element.text
|
55
71
|
end
|
56
|
-
blk[
|
72
|
+
blk[hash] if blk
|
57
73
|
end
|
58
74
|
end
|
59
75
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: arb-crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- arybin
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-02-
|
11
|
+
date: 2017-02-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|