scraping 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b9aff8bf11dbde49763faa84c830518cb308daab
4
- data.tar.gz: ba701a4490c085c2278ff93efbdeb244a9b74b12
3
+ metadata.gz: d078337c4224a96b2587427a06556877d69eb656
4
+ data.tar.gz: dc8bdf3cc7a53568c99d869dfdda769c17c79d7b
5
5
  SHA512:
6
- metadata.gz: b6bc072d8df959e32b8cd5230db684ebffdb08d23ce3a2633a683042eed6f92a5f0c8e61b4c35ecd570975461d417a93f6941b1688d328eafb76cf958f247627
7
- data.tar.gz: 0149081ebc7cd6073e2a178f10c1904e46a3e3cab8e2fa9f35fc82387b1a7938d4f48428a9eb58d1adc835680c5dc0248b76bd562fb7b1e8288cc24efca46154
6
+ metadata.gz: ca76e4ac712fc15fea39a3f7d76298bf20e651410ec8a515a63345d4563740e2f59caede6f6b0a314cbb7335c7a72888c336ef0ef18bd32ee10bfe7fc21da921
7
+ data.tar.gz: c42df98a5a21a72b6cb7397dd34a099ec40ef36567c3f7a7b76dfcb3a4a864c8fe034276dd6ecf53d347e442292e939471c43ac0704d4080c302062f40dfdcb9
data/README.md CHANGED
@@ -35,15 +35,16 @@ You can also scrape arrays, objects, and arrays of objects. `elements` and `elem
35
35
  ```ruby
36
36
  class YouCan
37
37
  include Scraping
38
+
38
39
  elements :scrape, '.scrape'
39
40
 
40
- elements :also_scrape, '.also-scrape li' do
41
+ sections :also_scrape, '.also-scrape li' do
41
42
  element :name, 'a'
42
43
  element :link, 'a/@href'
43
44
  elements :numbers, 'span'
44
45
  end
45
46
 
46
- elements_of :nested_scrape do
47
+ section :nested_scrape do
47
48
  element :data, '.data'
48
49
  end
49
50
  end
@@ -89,6 +90,10 @@ class Advanced
89
90
 
90
91
  element :birthday, '.birthday', as: :date
91
92
 
93
+ elements :numbers, 'span' do |node|
94
+ node.text.to_i * 10
95
+ end
96
+
92
97
  private
93
98
 
94
99
  def extract_date(node)
@@ -99,15 +104,39 @@ end
99
104
  advanced = Advanced.new(<<-EOF)
100
105
  <h1 class="name">Millard Fillmore</h1>
101
106
  <h2 class="birthday">7-1-1800</h2>
107
+ <span>1</span>
108
+ <span>2</span>
102
109
  EOF
103
110
 
104
111
  advanced.first_name #=> 'Millard'
105
112
  advanced.birthday #=> #<Date: 1800-01-07>
113
+ advanced.numbers #=> [10, 20]
114
+ ```
115
+
116
+ ## HTTP
117
+
118
+ Scraping is totally agnostic of HTTP, but if you need a suggestion, check out [HTTParty](https://github.com/jnunemaker/httparty).
119
+
120
+ ```ruby
121
+ class HackerNews
122
+ include HTTParty
123
+ include Scraping
124
+
125
+ base_uri 'https://news.ycombinator.com'
126
+ elements :stories, '.athing .title > a'
127
+
128
+ def self.scrape
129
+ super get('/').body
130
+ end
131
+ end
132
+
133
+ news = HackerNews.scrape
134
+ puts news.stories.inspect
106
135
  ```
107
136
 
108
137
  ## Contributing
109
138
 
110
- Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/scraping.
139
+ Bug reports and pull requests are welcome on GitHub at https://github.com/promptworks/scraping.
111
140
 
112
141
  ## License
113
142
 
@@ -2,8 +2,9 @@ require 'nokogiri'
2
2
  require 'scraping/version'
3
3
  require 'scraping/dsl'
4
4
  require 'scraping/rules/element'
5
- require 'scraping/rules/elements_of'
6
5
  require 'scraping/rules/elements'
6
+ require 'scraping/rules/section'
7
+ require 'scraping/rules/sections'
7
8
 
8
9
  module Scraping
9
10
  def self.included(base)
@@ -38,12 +39,17 @@ module Scraping
38
39
  super
39
40
  end
40
41
 
41
- def elements_of(name)
42
+ def elements(name, *)
42
43
  attr_accessor name
43
44
  super
44
45
  end
45
46
 
46
- def elements(name, *)
47
+ def section(name, *)
48
+ attr_accessor name
49
+ super
50
+ end
51
+
52
+ def sections(name, *)
47
53
  attr_accessor name
48
54
  super
49
55
  end
@@ -8,12 +8,16 @@ module Scraping
8
8
  rules[name] = Rules::Element.new(name, selector, options, &block)
9
9
  end
10
10
 
11
- def elements_of(name, &block)
12
- rules[name] = Rules::ElementsOf.new(name).evaluate(&block)
11
+ def elements(name, selector, options = {}, &block)
12
+ rules[name] = Rules::Elements.new(name, selector, options, &block)
13
13
  end
14
14
 
15
- def elements(name, selector, options = {}, &block)
16
- rules[name] = Rules::Elements.new(name, selector, options).evaluate(&block)
15
+ def section(name, selector = '.', &block)
16
+ rules[name] = Rules::Section.new(name, selector).evaluate(&block)
17
+ end
18
+
19
+ def sections(name, selector, &block)
20
+ rules[name] = Rules::Sections.new(name, selector).evaluate(&block)
17
21
  end
18
22
  end
19
23
  end
@@ -1,27 +1,18 @@
1
+ require 'scraping/rules/element'
2
+
1
3
  module Scraping
2
4
  module Rules
3
- class Elements
4
- attr_reader :name, :selector, :rule, :options
5
-
6
- def initialize(name, selector, options = {})
7
- @name = name
8
- @selector = selector
9
- @options = options
10
- end
11
-
12
- def evaluate(&block)
13
- if block_given?
14
- @rule = ElementsOf.new(name).evaluate(&block)
15
- else
16
- @rule = Element.new(name, '.', options)
17
- end
5
+ class Elements < Element
6
+ attr_reader :multiselector
18
7
 
19
- self
8
+ def initialize(name, selector, options = {}, &extract)
9
+ super(name, '.', options, &extract)
10
+ @multiselector = selector
20
11
  end
21
12
 
22
13
  def call(scraper, node)
23
- node.search(selector).map do |item|
24
- rule.call(scraper, item)
14
+ node.search(multiselector).map do |item|
15
+ super scraper, item
25
16
  end
26
17
  end
27
18
  end
@@ -2,12 +2,13 @@ require 'ostruct'
2
2
 
3
3
  module Scraping
4
4
  module Rules
5
- class ElementsOf
5
+ class Section
6
6
  include DSL
7
- attr_reader :name
7
+ attr_reader :name, :selector
8
8
 
9
- def initialize(name)
9
+ def initialize(name, selector = '.')
10
10
  @name = name
11
+ @selector = selector
11
12
  end
12
13
 
13
14
  def evaluate(&block)
@@ -17,7 +18,7 @@ module Scraping
17
18
 
18
19
  def call(scraper, node)
19
20
  rules.inject(OpenStruct.new) do |obj, (name, rule)|
20
- obj[name] = rule.call(scraper, node)
21
+ obj[name] = rule.call scraper, node.at(selector)
21
22
  obj
22
23
  end
23
24
  end
@@ -0,0 +1,20 @@
1
+ require 'scraping/rules/section'
2
+
3
+ module Scraping
4
+ module Rules
5
+ class Sections < Section
6
+ attr_reader :multiselector
7
+
8
+ def initialize(name, selector)
9
+ super name, '.'
10
+ @multiselector = selector
11
+ end
12
+
13
+ def call(scraper, node)
14
+ node.search(multiselector).map do |item|
15
+ super scraper, item
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -1,3 +1,3 @@
1
1
  module Scraping
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scraping
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ray Zane
@@ -83,7 +83,8 @@ files:
83
83
  - lib/scraping/dsl.rb
84
84
  - lib/scraping/rules/element.rb
85
85
  - lib/scraping/rules/elements.rb
86
- - lib/scraping/rules/elements_of.rb
86
+ - lib/scraping/rules/section.rb
87
+ - lib/scraping/rules/sections.rb
87
88
  - lib/scraping/version.rb
88
89
  - scraping.gemspec
89
90
  homepage: https://github.com/rzane/scraping