horsefield 0.6.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/horsefield.gemspec +1 -1
- data/lib/horsefield/diggable.rb +14 -1
- data/lib/horsefield/version.rb +1 -1
- data/test/horsefield/test_scraper.rb +14 -2
- metadata +11 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 361f824eedceebb4366e8d2a8041b15e9344424c801993059e435af201449442
|
4
|
+
data.tar.gz: 6bce024ad4503a6dbca19e3f5ca58ba6ae3cb4216c151ed3177e0eeb0cc3c028
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e966a5d390b4928ab479077fa31efd4a4df4471c8b1cb75fc4ea2e22054f6ae0792c01bc6033824022cea5d12d6e12094327e5cc4bb9046808364a896f17e19e
|
7
|
+
data.tar.gz: eee3aedbfa542d46573952aac4ea4a986901c2640bcc2fc7961d4de20fc1764f59d925df94139654479c2abc952758d24d11ddce36c8736d899486fa3b3fafcb
|
data/horsefield.gemspec
CHANGED
@@ -20,7 +20,7 @@ Gem::Specification.new do |spec|
|
|
20
20
|
spec.add_dependency 'nokogiri'
|
21
21
|
|
22
22
|
spec.add_development_dependency "bundler", "~> 1.7"
|
23
|
-
spec.add_development_dependency "rake", "
|
23
|
+
spec.add_development_dependency "rake", ">= 12.3.3"
|
24
24
|
spec.add_development_dependency "guard"
|
25
25
|
spec.add_development_dependency "guard-minitest"
|
26
26
|
spec.add_development_dependency "pry"
|
data/lib/horsefield/diggable.rb
CHANGED
@@ -24,7 +24,20 @@ module Horsefield
|
|
24
24
|
raise MissingSelectorError, "Couldn't find required selector (#{selector})" if lookup == :required && !doc
|
25
25
|
return fields if lookup == :presence && !doc
|
26
26
|
|
27
|
-
|
27
|
+
if block
|
28
|
+
# Process the sub-document
|
29
|
+
sub_doc = doc && doc.with_fresh_fields
|
30
|
+
|
31
|
+
# Run the block to populate fields and get its return value
|
32
|
+
return_value = sub_doc && sub_doc.instance_eval(&block)
|
33
|
+
|
34
|
+
# Use fields if they were populated, otherwise use the block's return value
|
35
|
+
value = (sub_doc && !sub_doc.fields.empty?) ? sub_doc.fields : return_value
|
36
|
+
|
37
|
+
fields.merge!(Hash[[[name, value]]])
|
38
|
+
else
|
39
|
+
fields.merge!(Hash[[[name, doc && doc.text.strip]]])
|
40
|
+
end
|
28
41
|
end
|
29
42
|
|
30
43
|
def many!(name, selector, &block)
|
data/lib/horsefield/version.rb
CHANGED
@@ -1,9 +1,19 @@
|
|
1
|
-
|
1
|
+
require_relative '../test_helper'
|
2
2
|
require 'pry'
|
3
3
|
|
4
4
|
class RedditScraper
|
5
5
|
include Horsefield::Scraper
|
6
6
|
|
7
|
+
one :meta do
|
8
|
+
[:keywords].each do |name|
|
9
|
+
one name, ".//meta[@name='#{name}']/@content"
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
one :static do
|
14
|
+
"test"
|
15
|
+
end
|
16
|
+
|
7
17
|
many :posts, '#siteTable .thing' do
|
8
18
|
one :title, 'a.title'
|
9
19
|
one :tagline, 'p.tagline' do
|
@@ -24,6 +34,8 @@ class TestScraper < Minitest::Test
|
|
24
34
|
|
25
35
|
def test_scraper
|
26
36
|
reddit = RedditScraper.new(@reddit_html).scrape
|
27
|
-
|
37
|
+
assert_equal "reddit, reddit.com, vote, comment, submit", reddit[:meta][:keywords]
|
38
|
+
assert_equal "Chris Pratt, homeless, living in this van, holding the script to his first acting job", reddit[:posts][0][:title]
|
39
|
+
assert_equal "test", reddit[:static]
|
28
40
|
end
|
29
41
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: horsefield
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Erik Strömberg
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2025-05-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -42,16 +42,16 @@ dependencies:
|
|
42
42
|
name: rake
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- - "
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
47
|
+
version: 12.3.3
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- - "
|
52
|
+
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
54
|
+
version: 12.3.3
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: guard
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -94,7 +94,7 @@ dependencies:
|
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
-
description:
|
97
|
+
description:
|
98
98
|
email:
|
99
99
|
- erik.stromberg@gmail.com
|
100
100
|
executables: []
|
@@ -124,7 +124,7 @@ homepage: http://github.com/apa512/horsefield
|
|
124
124
|
licenses:
|
125
125
|
- MIT
|
126
126
|
metadata: {}
|
127
|
-
post_install_message:
|
127
|
+
post_install_message:
|
128
128
|
rdoc_options: []
|
129
129
|
require_paths:
|
130
130
|
- lib
|
@@ -139,9 +139,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
139
139
|
- !ruby/object:Gem::Version
|
140
140
|
version: '0'
|
141
141
|
requirements: []
|
142
|
-
|
143
|
-
|
144
|
-
signing_key:
|
142
|
+
rubygems_version: 3.5.22
|
143
|
+
signing_key:
|
145
144
|
specification_version: 4
|
146
145
|
summary: It's a scraper
|
147
146
|
test_files:
|