storexplore 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/lib/storexplore/api.rb +10 -1
- data/lib/storexplore/dsl.rb +16 -0
- data/lib/storexplore/version.rb +1 -1
- data/lib/storexplore/walker_page.rb +5 -11
- data/spec/lib/storexplore/dsl_spec.rb +20 -0
- data/spec/lib/storexplore/walker_page_spec.rb +3 -2
- data.tar.gz.sig +0 -0
- metadata +2 -2
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 10fe4669a7509cd1b7ff06f47fd12fc3269894ff
|
4
|
+
data.tar.gz: 2d3f18b217666c80219922f49de9488fad98b03a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 39223a9ac42ce39c6e35394723464b87a192235f92b9d1e398022426b92a5c8a93a0e1ea955003d1cef71edc9f117f7bfdc7a1062c729ee9de2f1be6cf04e8a1
|
7
|
+
data.tar.gz: 93e31befe06656e2ff8f71590b78b06780eaba677b963bc86adff83bc963480502a7441576dffda5a48a3453742f3f6ddd77dd96710696af3ab179f8fb81e240
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/lib/storexplore/api.rb
CHANGED
@@ -19,6 +19,8 @@
|
|
19
19
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
20
|
# MA 02110-1301 USA
|
21
21
|
|
22
|
+
require 'mechanize'
|
23
|
+
|
22
24
|
module Storexplore
|
23
25
|
|
24
26
|
# Main entry point to the library
|
@@ -37,7 +39,14 @@ module Storexplore
|
|
37
39
|
# included in the url of the store.
|
38
40
|
# Returns a Storexplore::Walker for the home page of the store
|
39
41
|
def self.browse(store_url)
|
40
|
-
|
42
|
+
agent = Mechanize.new do |it|
|
43
|
+
# NOTE: by default Mechanize has infinite history, and causes memory leaks
|
44
|
+
#it.history.max_size = 0
|
45
|
+
end
|
46
|
+
|
47
|
+
builder = builder(store_url)
|
48
|
+
builder.configure_agent(agent)
|
49
|
+
builder.new_walker(WalkerPage.open(agent,store_url))
|
41
50
|
end
|
42
51
|
|
43
52
|
# Forgets the previously defined store API by its name. Mainly useful while
|
data/lib/storexplore/dsl.rb
CHANGED
@@ -36,11 +36,20 @@ module Storexplore
|
|
36
36
|
# Initializes a new instance with no special categories, items or
|
37
37
|
# attributes definition. (Internal usage)
|
38
38
|
def initialize
|
39
|
+
@configure_agent_block = lambda do |_| {} end
|
39
40
|
@scrap_attributes_block = lambda do |_| {} end
|
40
41
|
@categories_digger = NullDigger.new
|
41
42
|
@items_digger = NullDigger.new
|
42
43
|
end
|
43
44
|
|
45
|
+
# Registers a block that can customize the Mechanize Agent that will be
|
46
|
+
# used throughout the the store digging. This can be useful to setup
|
47
|
+
# custom cookies for example. Ignored anywhere except on the top level
|
48
|
+
# store definition.
|
49
|
+
def agent(&block)
|
50
|
+
@configure_agent_block = block
|
51
|
+
end
|
52
|
+
|
44
53
|
# Registers the block to be used to extract attributes from a store page.
|
45
54
|
# Block will be evaluated within the context of a Storexplore::WalkerPage
|
46
55
|
def attributes(&block)
|
@@ -60,6 +69,13 @@ module Storexplore
|
|
60
69
|
@items_digger = Digger.new(selector, Dsl.walker_builder(&block))
|
61
70
|
end
|
62
71
|
|
72
|
+
# Initializes the mechanize agent with the given setup
|
73
|
+
# * agent : the mechanize agent that will be used throughout the walking
|
74
|
+
# (Internal usage)
|
75
|
+
def configure_agent(agent)
|
76
|
+
@configure_agent_block.call(agent)
|
77
|
+
end
|
78
|
+
|
63
79
|
# Initializes a new Storexplore::Walker instance based on specified custom
|
64
80
|
# definitions from the instance.
|
65
81
|
# * page_getter : proxy to the page that we want to explore
|
data/lib/storexplore/version.rb
CHANGED
@@ -19,8 +19,6 @@
|
|
19
19
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
20
|
# MA 02110-1301 USA
|
21
21
|
|
22
|
-
require 'mechanize'
|
23
|
-
|
24
22
|
module Storexplore
|
25
23
|
|
26
24
|
# Wrapper around Mechanize::Page providing strict one liners to select
|
@@ -30,8 +28,8 @@ module Storexplore
|
|
30
28
|
|
31
29
|
# A new lazy proxy on the page.
|
32
30
|
# (Internal Usage)
|
33
|
-
def self.open(uri)
|
34
|
-
Getter.new(uri)
|
31
|
+
def self.open(agent, uri)
|
32
|
+
Getter.new(agent, uri)
|
35
33
|
end
|
36
34
|
|
37
35
|
# Uri of the page
|
@@ -99,7 +97,8 @@ module Storexplore
|
|
99
97
|
class Getter
|
100
98
|
attr_reader :uri
|
101
99
|
|
102
|
-
def initialize(uri)
|
100
|
+
def initialize(agent, uri)
|
101
|
+
@agent = agent
|
103
102
|
@uri = uri
|
104
103
|
end
|
105
104
|
def get
|
@@ -113,12 +112,7 @@ module Storexplore
|
|
113
112
|
private
|
114
113
|
|
115
114
|
def get_page
|
116
|
-
agent
|
117
|
-
# NOTE: by default Mechanize has infinite history, and causes memory leaks
|
118
|
-
it.history.max_size = 0
|
119
|
-
end
|
120
|
-
|
121
|
-
WalkerPage.new(agent.get(@uri))
|
115
|
+
WalkerPage.new(@agent.get(@uri))
|
122
116
|
end
|
123
117
|
end
|
124
118
|
|
@@ -23,6 +23,12 @@ require "spec_helper"
|
|
23
23
|
|
24
24
|
module Storexplore
|
25
25
|
|
26
|
+
class DslSpecAgent
|
27
|
+
class << self
|
28
|
+
attr_accessor :instance
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
26
32
|
describe Dsl do
|
27
33
|
|
28
34
|
def browse
|
@@ -34,6 +40,9 @@ module Storexplore
|
|
34
40
|
end
|
35
41
|
|
36
42
|
context 'a simple store' do
|
43
|
+
|
44
|
+
attr_accessor :actual_agent
|
45
|
+
|
37
46
|
before :each do
|
38
47
|
FakeWeb.register_uri(:get, "http://www.cats-surplus.com", content_type: 'text/html', body: <<-eos)
|
39
48
|
<html>
|
@@ -72,6 +81,10 @@ module Storexplore
|
|
72
81
|
FakeWeb.register_uri(:get, "http://www.cats-surplus.com/legal.html", content_type: 'text/html', body: "")
|
73
82
|
|
74
83
|
Storexplore::Api.define 'cats' do
|
84
|
+
agent do |it|
|
85
|
+
it.history.max_size = 1
|
86
|
+
DslSpecAgent.instance = it
|
87
|
+
end
|
75
88
|
items 'a.item' do
|
76
89
|
attributes do
|
77
90
|
{ page: page,
|
@@ -141,6 +154,12 @@ module Storexplore
|
|
141
154
|
expect(@walker.items.first.attributes[:page]).to be_instance_of(WalkerPage)
|
142
155
|
end
|
143
156
|
|
157
|
+
it "enables customization of the mechanize agent" do
|
158
|
+
@walker.items
|
159
|
+
|
160
|
+
expect(DslSpecAgent.instance.page.uri).to eq URI("http://www.cats-surplus.com")
|
161
|
+
end
|
162
|
+
|
144
163
|
|
145
164
|
context "when troubleshooting" do
|
146
165
|
|
@@ -211,5 +230,6 @@ module Storexplore
|
|
211
230
|
expect(@walker.attributes).to be_empty
|
212
231
|
end
|
213
232
|
end
|
233
|
+
|
214
234
|
end
|
215
235
|
end
|
@@ -20,6 +20,7 @@
|
|
20
20
|
# MA 02110-1301 USA
|
21
21
|
|
22
22
|
require 'spec_helper'
|
23
|
+
require 'mechanize'
|
23
24
|
|
24
25
|
module Storexplore
|
25
26
|
|
@@ -27,12 +28,12 @@ module Storexplore
|
|
27
28
|
|
28
29
|
before :each do
|
29
30
|
@uri = URI.parse("file://" + File.expand_path(File.join(File.dirname(__FILE__), 'store_walker_page_spec_fixture.html')))
|
30
|
-
@page_getter = WalkerPage.open(@uri)
|
31
|
+
@page_getter = WalkerPage.open(Mechanize.new, @uri)
|
31
32
|
end
|
32
33
|
|
33
34
|
context "before actually getting the page" do
|
34
35
|
it "nothing should throw if the uri is invalid" do
|
35
|
-
expect(lambda { WalkerPage.open("http://impossible.file.name") }).not_to raise_error
|
36
|
+
expect(lambda { WalkerPage.open(Mechanize.new, "http://impossible.file.name") }).not_to raise_error
|
36
37
|
end
|
37
38
|
|
38
39
|
it "knows the uri of the page" do
|
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: storexplore
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Philou
|
@@ -29,7 +29,7 @@ cert_chain:
|
|
29
29
|
LKjGZUoe+A3/FKYHWLPZAArCXrXLhpjAfopLBNmOAju0e30ObcjPQDWcj7z5jfJl
|
30
30
|
ILiFgo6hdHrp/tFaDv14PiMRm6sZaQ==
|
31
31
|
-----END CERTIFICATE-----
|
32
|
-
date: 2014-11-
|
32
|
+
date: 2014-11-05 00:00:00.000000000 Z
|
33
33
|
dependencies:
|
34
34
|
- !ruby/object:Gem::Dependency
|
35
35
|
name: mechanize
|
metadata.gz.sig
CHANGED
Binary file
|