arb-crawler 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/arb/crawler.rb +6 -3
- data/lib/arb/crawler/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: df2e4dc94d0653f7567230ceb393b11e96602e2f
|
|
4
|
+
data.tar.gz: f3f9776483c4e385ff0c49070ffbc36f6c25852b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c859ad0dad974404bedbbb82d876fe9669cadbca818ef01a877cfea1d0172a82a9539b420c1008a2b0791fd54d9368de349084874ec21cc80865d25d02d47145
|
|
7
|
+
data.tar.gz: a3974808e0167d7ccd568e6e0ebd29a16a4bdd5f283344fedae7e74ae0312594625a3733d2b00fc90717886f63cae99eda97290a2960f2c32a88f68df6662541
|
data/lib/arb/crawler.rb
CHANGED
|
@@ -60,7 +60,8 @@ module Arb
|
|
|
60
60
|
end
|
|
61
61
|
end
|
|
62
62
|
|
|
63
|
-
|
|
63
|
+
#Make sure that the content_key distinct from those attribute keys
|
|
64
|
+
define_method "#{method}_by_#{way}" do |url,css_or_xpath,content_key=:text,&blk|
|
|
64
65
|
[].tap do |arr|
|
|
65
66
|
raw=send("#{method}_by_#{way}_raw",url,css_or_xpath)
|
|
66
67
|
raw && raw.each do |nokogiri_element|
|
|
@@ -68,8 +69,10 @@ module Arb
|
|
|
68
69
|
nokogiri_element.attributes.keys.each do |key|
|
|
69
70
|
hash[key.to_sym]=nokogiri_element.attribute(key).value
|
|
70
71
|
end
|
|
71
|
-
hash.
|
|
72
|
-
|
|
72
|
+
if hash.keys.include?(content_key.to_sym)
|
|
73
|
+
puts("Warning: Content key #{content_key} can not be used due to conflict!")
|
|
74
|
+
else
|
|
75
|
+
hash[content_key]=nokogiri_element.text
|
|
73
76
|
end
|
|
74
77
|
blk[hash] if blk
|
|
75
78
|
end
|
data/lib/arb/crawler/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: arb-crawler
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.0.
|
|
4
|
+
version: 1.0.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- arybin
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2017-
|
|
11
|
+
date: 2017-08-06 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -115,7 +115,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
115
115
|
version: '0'
|
|
116
116
|
requirements: []
|
|
117
117
|
rubyforge_project:
|
|
118
|
-
rubygems_version: 2.
|
|
118
|
+
rubygems_version: 2.6.12
|
|
119
119
|
signing_key:
|
|
120
120
|
specification_version: 4
|
|
121
121
|
summary: Web page crawler.
|