gumtree_scraper 0.0.1 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +3 -3
- data/lib/gum/gum.rb +1 -1
- data/lib/gum/pages.rb +5 -2
- data/lib/gum/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2657037c9ffa4200cc4fba6693fc1a6ccdf4ade4
|
4
|
+
data.tar.gz: 393b0f81f385de12272a496f8b5383d5e48e35d7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ed75e5e9e9bd4b6eb31dc02724d701ef308d3846c0db9eed81a680d51a2ec81299abca64b57a5148a6254461dbe36e8ccb09f5c9e36d925bc175663ed122195e
|
7
|
+
data.tar.gz: 3a02bf158a85dcb797cb25c2a80d16f1afe21eaf64cd9c1fd9b86b8cba7352c87a79f36326da439289e50b4927a31b32df0f20552b820cb5848e8c0a17ceeb73
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -3,17 +3,17 @@ gumtree_scrape
|
|
3
3
|
|
4
4
|
Install with:
|
5
5
|
|
6
|
-
`gem install
|
6
|
+
`gem install gumtree_scraper`
|
7
7
|
|
8
8
|
Usage:
|
9
9
|
|
10
10
|
On terminal run:
|
11
11
|
|
12
|
-
`
|
12
|
+
`gumtree_scraper go`
|
13
13
|
|
14
14
|
If failing and IP has been blocked, there is a proxy built in. To use that, run:
|
15
15
|
|
16
|
-
`
|
16
|
+
`gumtree_scraper go --proxy`
|
17
17
|
|
18
18
|
At the moment only a single proxy is setup, http://anonymouse.org
|
19
19
|
|
data/lib/gum/gum.rb
CHANGED
data/lib/gum/pages.rb
CHANGED
@@ -47,6 +47,7 @@ module GUM
|
|
47
47
|
ask("Please select what type of search page you want to scrape:", :yellow, :limited_to =>GUM::Pages::LISTING_PAGES.keys)
|
48
48
|
end
|
49
49
|
|
50
|
+
|
50
51
|
# find the real values for each item in side the sub list
|
51
52
|
def get_values(section)
|
52
53
|
data = GUM::Pages::LISTING_PAGES[section]
|
@@ -59,6 +60,7 @@ module GUM
|
|
59
60
|
end
|
60
61
|
log("selected '#{values[k]}'")
|
61
62
|
end
|
63
|
+
@delay = ask("Enter delay time between calls: ", :yellow)
|
62
64
|
values
|
63
65
|
end
|
64
66
|
|
@@ -131,7 +133,8 @@ module GUM
|
|
131
133
|
log("found #{list_values[:item_links].length} adverts")
|
132
134
|
# merge together
|
133
135
|
adverts = adverts.concat(list_values[:item_links])
|
134
|
-
log("total length so far: #{adverts.length}")
|
136
|
+
log("total length so far: #{adverts.length}")
|
137
|
+
sleep @delay.to_f;
|
135
138
|
end
|
136
139
|
adverts
|
137
140
|
end
|
@@ -145,7 +148,7 @@ module GUM
|
|
145
148
|
file = self.url_to_file(url, @proxy)
|
146
149
|
values = self.load_without_output(file).values("advert").merge({url:url}) if ! file.nil?
|
147
150
|
data.push(values) if ! values[with].nil? && values[with].length > 0
|
148
|
-
sleep
|
151
|
+
sleep @delay.to_f;
|
149
152
|
end
|
150
153
|
data
|
151
154
|
end
|
data/lib/gum/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gumtree_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Charles Marshall
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-11-
|
11
|
+
date: 2014-11-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: docparser
|
@@ -72,7 +72,7 @@ files:
|
|
72
72
|
- lib/gum/gum.rb
|
73
73
|
- lib/gum/pages.rb
|
74
74
|
- lib/gum/version.rb
|
75
|
-
homepage: https://github.com/charlesmarshall/gumtree_scraper/tree/0.0.
|
75
|
+
homepage: https://github.com/charlesmarshall/gumtree_scraper/tree/0.0.5
|
76
76
|
licenses:
|
77
77
|
- MIT
|
78
78
|
metadata: {}
|