storexplore 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/lib/storexplore/api.rb +10 -1
- data/lib/storexplore/dsl.rb +16 -0
- data/lib/storexplore/version.rb +1 -1
- data/lib/storexplore/walker_page.rb +5 -11
- data/spec/lib/storexplore/dsl_spec.rb +20 -0
- data/spec/lib/storexplore/walker_page_spec.rb +3 -2
- data.tar.gz.sig +0 -0
- metadata +2 -2
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 10fe4669a7509cd1b7ff06f47fd12fc3269894ff
|
4
|
+
data.tar.gz: 2d3f18b217666c80219922f49de9488fad98b03a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 39223a9ac42ce39c6e35394723464b87a192235f92b9d1e398022426b92a5c8a93a0e1ea955003d1cef71edc9f117f7bfdc7a1062c729ee9de2f1be6cf04e8a1
|
7
|
+
data.tar.gz: 93e31befe06656e2ff8f71590b78b06780eaba677b963bc86adff83bc963480502a7441576dffda5a48a3453742f3f6ddd77dd96710696af3ab179f8fb81e240
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/lib/storexplore/api.rb
CHANGED
@@ -19,6 +19,8 @@
|
|
19
19
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
20
|
# MA 02110-1301 USA
|
21
21
|
|
22
|
+
require 'mechanize'
|
23
|
+
|
22
24
|
module Storexplore
|
23
25
|
|
24
26
|
# Main entry point to the library
|
@@ -37,7 +39,14 @@ module Storexplore
|
|
37
39
|
# included in the url of the store.
|
38
40
|
# Returns a Storexplore::Walker for the home page of the store
|
39
41
|
def self.browse(store_url)
|
40
|
-
|
42
|
+
agent = Mechanize.new do |it|
|
43
|
+
# NOTE: by default Mechanize has infinite history, and causes memory leaks
|
44
|
+
#it.history.max_size = 0
|
45
|
+
end
|
46
|
+
|
47
|
+
builder = builder(store_url)
|
48
|
+
builder.configure_agent(agent)
|
49
|
+
builder.new_walker(WalkerPage.open(agent,store_url))
|
41
50
|
end
|
42
51
|
|
43
52
|
# Forgets the previously defined store API by its name. Mainly useful while
|
data/lib/storexplore/dsl.rb
CHANGED
@@ -36,11 +36,20 @@ module Storexplore
|
|
36
36
|
# Initializes a new instance with no special categories, items or
|
37
37
|
# attributes definition. (Internal usage)
|
38
38
|
def initialize
|
39
|
+
@configure_agent_block = lambda do |_| {} end
|
39
40
|
@scrap_attributes_block = lambda do |_| {} end
|
40
41
|
@categories_digger = NullDigger.new
|
41
42
|
@items_digger = NullDigger.new
|
42
43
|
end
|
43
44
|
|
45
|
+
# Registers a block that can customize the Mechanize Agent that will be
|
46
|
+
# used throughout the the store digging. This can be useful to setup
|
47
|
+
# custom cookies for example. Ignored anywhere except on the top level
|
48
|
+
# store definition.
|
49
|
+
def agent(&block)
|
50
|
+
@configure_agent_block = block
|
51
|
+
end
|
52
|
+
|
44
53
|
# Registers the block to be used to extract attributes from a store page.
|
45
54
|
# Block will be evaluated within the context of a Storexplore::WalkerPage
|
46
55
|
def attributes(&block)
|
@@ -60,6 +69,13 @@ module Storexplore
|
|
60
69
|
@items_digger = Digger.new(selector, Dsl.walker_builder(&block))
|
61
70
|
end
|
62
71
|
|
72
|
+
# Initializes the mechanize agent with the given setup
|
73
|
+
# * agent : the mechanize agent that will be used throughout the walking
|
74
|
+
# (Internal usage)
|
75
|
+
def configure_agent(agent)
|
76
|
+
@configure_agent_block.call(agent)
|
77
|
+
end
|
78
|
+
|
63
79
|
# Initializes a new Storexplore::Walker instance based on specified custom
|
64
80
|
# definitions from the instance.
|
65
81
|
# * page_getter : proxy to the page that we want to explore
|
data/lib/storexplore/version.rb
CHANGED
@@ -19,8 +19,6 @@
|
|
19
19
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
20
|
# MA 02110-1301 USA
|
21
21
|
|
22
|
-
require 'mechanize'
|
23
|
-
|
24
22
|
module Storexplore
|
25
23
|
|
26
24
|
# Wrapper around Mechanize::Page providing strict one liners to select
|
@@ -30,8 +28,8 @@ module Storexplore
|
|
30
28
|
|
31
29
|
# A new lazy proxy on the page.
|
32
30
|
# (Internal Usage)
|
33
|
-
def self.open(uri)
|
34
|
-
Getter.new(uri)
|
31
|
+
def self.open(agent, uri)
|
32
|
+
Getter.new(agent, uri)
|
35
33
|
end
|
36
34
|
|
37
35
|
# Uri of the page
|
@@ -99,7 +97,8 @@ module Storexplore
|
|
99
97
|
class Getter
|
100
98
|
attr_reader :uri
|
101
99
|
|
102
|
-
def initialize(uri)
|
100
|
+
def initialize(agent, uri)
|
101
|
+
@agent = agent
|
103
102
|
@uri = uri
|
104
103
|
end
|
105
104
|
def get
|
@@ -113,12 +112,7 @@ module Storexplore
|
|
113
112
|
private
|
114
113
|
|
115
114
|
def get_page
|
116
|
-
agent
|
117
|
-
# NOTE: by default Mechanize has infinite history, and causes memory leaks
|
118
|
-
it.history.max_size = 0
|
119
|
-
end
|
120
|
-
|
121
|
-
WalkerPage.new(agent.get(@uri))
|
115
|
+
WalkerPage.new(@agent.get(@uri))
|
122
116
|
end
|
123
117
|
end
|
124
118
|
|
@@ -23,6 +23,12 @@ require "spec_helper"
|
|
23
23
|
|
24
24
|
module Storexplore
|
25
25
|
|
26
|
+
class DslSpecAgent
|
27
|
+
class << self
|
28
|
+
attr_accessor :instance
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
26
32
|
describe Dsl do
|
27
33
|
|
28
34
|
def browse
|
@@ -34,6 +40,9 @@ module Storexplore
|
|
34
40
|
end
|
35
41
|
|
36
42
|
context 'a simple store' do
|
43
|
+
|
44
|
+
attr_accessor :actual_agent
|
45
|
+
|
37
46
|
before :each do
|
38
47
|
FakeWeb.register_uri(:get, "http://www.cats-surplus.com", content_type: 'text/html', body: <<-eos)
|
39
48
|
<html>
|
@@ -72,6 +81,10 @@ module Storexplore
|
|
72
81
|
FakeWeb.register_uri(:get, "http://www.cats-surplus.com/legal.html", content_type: 'text/html', body: "")
|
73
82
|
|
74
83
|
Storexplore::Api.define 'cats' do
|
84
|
+
agent do |it|
|
85
|
+
it.history.max_size = 1
|
86
|
+
DslSpecAgent.instance = it
|
87
|
+
end
|
75
88
|
items 'a.item' do
|
76
89
|
attributes do
|
77
90
|
{ page: page,
|
@@ -141,6 +154,12 @@ module Storexplore
|
|
141
154
|
expect(@walker.items.first.attributes[:page]).to be_instance_of(WalkerPage)
|
142
155
|
end
|
143
156
|
|
157
|
+
it "enables customization of the mechanize agent" do
|
158
|
+
@walker.items
|
159
|
+
|
160
|
+
expect(DslSpecAgent.instance.page.uri).to eq URI("http://www.cats-surplus.com")
|
161
|
+
end
|
162
|
+
|
144
163
|
|
145
164
|
context "when troubleshooting" do
|
146
165
|
|
@@ -211,5 +230,6 @@ module Storexplore
|
|
211
230
|
expect(@walker.attributes).to be_empty
|
212
231
|
end
|
213
232
|
end
|
233
|
+
|
214
234
|
end
|
215
235
|
end
|
@@ -20,6 +20,7 @@
|
|
20
20
|
# MA 02110-1301 USA
|
21
21
|
|
22
22
|
require 'spec_helper'
|
23
|
+
require 'mechanize'
|
23
24
|
|
24
25
|
module Storexplore
|
25
26
|
|
@@ -27,12 +28,12 @@ module Storexplore
|
|
27
28
|
|
28
29
|
before :each do
|
29
30
|
@uri = URI.parse("file://" + File.expand_path(File.join(File.dirname(__FILE__), 'store_walker_page_spec_fixture.html')))
|
30
|
-
@page_getter = WalkerPage.open(@uri)
|
31
|
+
@page_getter = WalkerPage.open(Mechanize.new, @uri)
|
31
32
|
end
|
32
33
|
|
33
34
|
context "before actually getting the page" do
|
34
35
|
it "nothing should throw if the uri is invalid" do
|
35
|
-
expect(lambda { WalkerPage.open("http://impossible.file.name") }).not_to raise_error
|
36
|
+
expect(lambda { WalkerPage.open(Mechanize.new, "http://impossible.file.name") }).not_to raise_error
|
36
37
|
end
|
37
38
|
|
38
39
|
it "knows the uri of the page" do
|
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: storexplore
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Philou
|
@@ -29,7 +29,7 @@ cert_chain:
|
|
29
29
|
LKjGZUoe+A3/FKYHWLPZAArCXrXLhpjAfopLBNmOAju0e30ObcjPQDWcj7z5jfJl
|
30
30
|
ILiFgo6hdHrp/tFaDv14PiMRm6sZaQ==
|
31
31
|
-----END CERTIFICATE-----
|
32
|
-
date: 2014-11-
|
32
|
+
date: 2014-11-05 00:00:00.000000000 Z
|
33
33
|
dependencies:
|
34
34
|
- !ruby/object:Gem::Dependency
|
35
35
|
name: mechanize
|
metadata.gz.sig
CHANGED
Binary file
|