storexplore 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 62ed201012136d3411fcb8d36033e956cbd257ee
4
- data.tar.gz: 5cabaaaf2165ec4513fa74114114328ac219b019
3
+ metadata.gz: 10fe4669a7509cd1b7ff06f47fd12fc3269894ff
4
+ data.tar.gz: 2d3f18b217666c80219922f49de9488fad98b03a
5
5
  SHA512:
6
- metadata.gz: bc4456ffb7df629729938ddd041491682758f8d14c1d31f396e7d5a4971da9c9d989c0a8d9d84bd8069b7a8a0b29c9acbc9c99cd8b97ec1935adfbaa216f9536
7
- data.tar.gz: b0cf18167eb27abee8ff815dae79e10f82904c085dec9ceb4ca1e3b99a42f01226f0c84f638b364bfac2588f077ae7fb9f29a9a6d702052279493225ed3cd71d
6
+ metadata.gz: 39223a9ac42ce39c6e35394723464b87a192235f92b9d1e398022426b92a5c8a93a0e1ea955003d1cef71edc9f117f7bfdc7a1062c729ee9de2f1be6cf04e8a1
7
+ data.tar.gz: 93e31befe06656e2ff8f71590b78b06780eaba677b963bc86adff83bc963480502a7441576dffda5a48a3453742f3f6ddd77dd96710696af3ab179f8fb81e240
checksums.yaml.gz.sig CHANGED
Binary file
@@ -19,6 +19,8 @@
19
19
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20
20
  # MA 02110-1301 USA
21
21
 
22
+ require 'mechanize'
23
+
22
24
  module Storexplore
23
25
 
24
26
  # Main entry point to the library
@@ -37,7 +39,14 @@ module Storexplore
37
39
  # included in the url of the store.
38
40
  # Returns a Storexplore::Walker for the home page of the store
39
41
  def self.browse(store_url)
40
- builder(store_url).new_walker(WalkerPage.open(store_url))
42
+ agent = Mechanize.new do |it|
43
+ # NOTE: by default Mechanize has infinite history, and causes memory leaks
44
+ #it.history.max_size = 0
45
+ end
46
+
47
+ builder = builder(store_url)
48
+ builder.configure_agent(agent)
49
+ builder.new_walker(WalkerPage.open(agent,store_url))
41
50
  end
42
51
 
43
52
  # Forgets the previously defined store API by its name. Mainly useful while
@@ -36,11 +36,20 @@ module Storexplore
36
36
  # Initializes a new instance with no special categories, items or
37
37
  # attributes definition. (Internal usage)
38
38
  def initialize
39
+ @configure_agent_block = lambda do |_| {} end
39
40
  @scrap_attributes_block = lambda do |_| {} end
40
41
  @categories_digger = NullDigger.new
41
42
  @items_digger = NullDigger.new
42
43
  end
43
44
 
45
+ # Registers a block that can customize the Mechanize Agent that will be
46
+ # used throughout the the store digging. This can be useful to setup
47
+ # custom cookies for example. Ignored anywhere except on the top level
48
+ # store definition.
49
+ def agent(&block)
50
+ @configure_agent_block = block
51
+ end
52
+
44
53
  # Registers the block to be used to extract attributes from a store page.
45
54
  # Block will be evaluated within the context of a Storexplore::WalkerPage
46
55
  def attributes(&block)
@@ -60,6 +69,13 @@ module Storexplore
60
69
  @items_digger = Digger.new(selector, Dsl.walker_builder(&block))
61
70
  end
62
71
 
72
+ # Initializes the mechanize agent with the given setup
73
+ # * agent : the mechanize agent that will be used throughout the walking
74
+ # (Internal usage)
75
+ def configure_agent(agent)
76
+ @configure_agent_block.call(agent)
77
+ end
78
+
63
79
  # Initializes a new Storexplore::Walker instance based on specified custom
64
80
  # definitions from the instance.
65
81
  # * page_getter : proxy to the page that we want to explore
@@ -20,5 +20,5 @@
20
20
  # MA 02110-1301 USA
21
21
 
22
22
  module Storexplore
23
- VERSION = "0.3.0"
23
+ VERSION = "0.4.0"
24
24
  end
@@ -19,8 +19,6 @@
19
19
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20
20
  # MA 02110-1301 USA
21
21
 
22
- require 'mechanize'
23
-
24
22
  module Storexplore
25
23
 
26
24
  # Wrapper around Mechanize::Page providing strict one liners to select
@@ -30,8 +28,8 @@ module Storexplore
30
28
 
31
29
  # A new lazy proxy on the page.
32
30
  # (Internal Usage)
33
- def self.open(uri)
34
- Getter.new(uri)
31
+ def self.open(agent, uri)
32
+ Getter.new(agent, uri)
35
33
  end
36
34
 
37
35
  # Uri of the page
@@ -99,7 +97,8 @@ module Storexplore
99
97
  class Getter
100
98
  attr_reader :uri
101
99
 
102
- def initialize(uri)
100
+ def initialize(agent, uri)
101
+ @agent = agent
103
102
  @uri = uri
104
103
  end
105
104
  def get
@@ -113,12 +112,7 @@ module Storexplore
113
112
  private
114
113
 
115
114
  def get_page
116
- agent = Mechanize.new do |it|
117
- # NOTE: by default Mechanize has infinite history, and causes memory leaks
118
- it.history.max_size = 0
119
- end
120
-
121
- WalkerPage.new(agent.get(@uri))
115
+ WalkerPage.new(@agent.get(@uri))
122
116
  end
123
117
  end
124
118
 
@@ -23,6 +23,12 @@ require "spec_helper"
23
23
 
24
24
  module Storexplore
25
25
 
26
+ class DslSpecAgent
27
+ class << self
28
+ attr_accessor :instance
29
+ end
30
+ end
31
+
26
32
  describe Dsl do
27
33
 
28
34
  def browse
@@ -34,6 +40,9 @@ module Storexplore
34
40
  end
35
41
 
36
42
  context 'a simple store' do
43
+
44
+ attr_accessor :actual_agent
45
+
37
46
  before :each do
38
47
  FakeWeb.register_uri(:get, "http://www.cats-surplus.com", content_type: 'text/html', body: <<-eos)
39
48
  <html>
@@ -72,6 +81,10 @@ module Storexplore
72
81
  FakeWeb.register_uri(:get, "http://www.cats-surplus.com/legal.html", content_type: 'text/html', body: "")
73
82
 
74
83
  Storexplore::Api.define 'cats' do
84
+ agent do |it|
85
+ it.history.max_size = 1
86
+ DslSpecAgent.instance = it
87
+ end
75
88
  items 'a.item' do
76
89
  attributes do
77
90
  { page: page,
@@ -141,6 +154,12 @@ module Storexplore
141
154
  expect(@walker.items.first.attributes[:page]).to be_instance_of(WalkerPage)
142
155
  end
143
156
 
157
+ it "enables customization of the mechanize agent" do
158
+ @walker.items
159
+
160
+ expect(DslSpecAgent.instance.page.uri).to eq URI("http://www.cats-surplus.com")
161
+ end
162
+
144
163
 
145
164
  context "when troubleshooting" do
146
165
 
@@ -211,5 +230,6 @@ module Storexplore
211
230
  expect(@walker.attributes).to be_empty
212
231
  end
213
232
  end
233
+
214
234
  end
215
235
  end
@@ -20,6 +20,7 @@
20
20
  # MA 02110-1301 USA
21
21
 
22
22
  require 'spec_helper'
23
+ require 'mechanize'
23
24
 
24
25
  module Storexplore
25
26
 
@@ -27,12 +28,12 @@ module Storexplore
27
28
 
28
29
  before :each do
29
30
  @uri = URI.parse("file://" + File.expand_path(File.join(File.dirname(__FILE__), 'store_walker_page_spec_fixture.html')))
30
- @page_getter = WalkerPage.open(@uri)
31
+ @page_getter = WalkerPage.open(Mechanize.new, @uri)
31
32
  end
32
33
 
33
34
  context "before actually getting the page" do
34
35
  it "nothing should throw if the uri is invalid" do
35
- expect(lambda { WalkerPage.open("http://impossible.file.name") }).not_to raise_error
36
+ expect(lambda { WalkerPage.open(Mechanize.new, "http://impossible.file.name") }).not_to raise_error
36
37
  end
37
38
 
38
39
  it "knows the uri of the page" do
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: storexplore
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Philou
@@ -29,7 +29,7 @@ cert_chain:
29
29
  LKjGZUoe+A3/FKYHWLPZAArCXrXLhpjAfopLBNmOAju0e30ObcjPQDWcj7z5jfJl
30
30
  ILiFgo6hdHrp/tFaDv14PiMRm6sZaQ==
31
31
  -----END CERTIFICATE-----
32
- date: 2014-11-04 00:00:00.000000000 Z
32
+ date: 2014-11-05 00:00:00.000000000 Z
33
33
  dependencies:
34
34
  - !ruby/object:Gem::Dependency
35
35
  name: mechanize
metadata.gz.sig CHANGED
Binary file