extraloop 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@ results = []
4
4
 
5
5
  ExtraLoop::IterativeScraper.new("https://www.google.com/search?tbm=nws&q=Egypt", :log => {
6
6
  :log_level => :debug,
7
- :appenders => [Logging.appenders.stderr ]
7
+ :appenders => [ Logging.appenders.stderr ]
8
8
 
9
9
  }).set_iteration(:start, (1..101).step(10)).
10
10
  loop_on("h3") { |nodes| nodes.map(&:parent) }.
@@ -23,7 +23,9 @@ options = {
23
23
  :log_level => :info
24
24
  }
25
25
  }
26
- request_arguments = { :params => params }
26
+ request_arguments = { :params => params, :headers => {
27
+ "User-Agent" => "ExtraLoop - ruby data extraction toolkit: http://github.com/afiore/extraloop"
28
+ }}
27
29
 
28
30
 
29
31
  #
@@ -39,9 +41,9 @@ ExtraLoop::IterativeScraper.new(api_url, options, request_arguments).
39
41
  extract(:ns).
40
42
  extract(:type).
41
43
  extract(:timestamp).
42
- on(:data, proc { |results|
44
+ on(:data) do |results|
43
45
  results.each { |record| all_results << record }
44
- }).
46
+ end.
45
47
  continue_with(:cmcontinue, ['query-continue', 'categorymembers', 'cmcontinue']).
46
48
  run()
47
49
 
@@ -29,7 +29,9 @@ class WikipediaCategoryScraper < ExtraLoop::IterativeScraper
29
29
  :format => :json,
30
30
  :log => false
31
31
  }
32
- request_arguments = { :params => params }
32
+ request_arguments = { :params => params, :headers => {
33
+ "User-Agent" => "ExtraLoop - ruby data extraction toolkit: http://github.com/afiore/extraloop"
34
+ }}
33
35
 
34
36
  super(@@api_url, options, request_arguments)
35
37
 
@@ -38,17 +40,16 @@ class WikipediaCategoryScraper < ExtraLoop::IterativeScraper
38
40
  extract(:ns).
39
41
  extract(:type).
40
42
  extract(:timestamp).
41
- on(:data, proc { |results|
43
+ on(:data) do |results|
42
44
  puts "#{"\t" * (@options[:depth] - 2).abs } #{@scraper.request_arguments[:params][:cmtitle]}"
43
45
  categories = results.select{ |record| record.ns === 14 }.each { |category| results.delete(category) }
44
46
 
45
-
46
47
  categories.each do |record|
47
48
  # Instanciate a sub scraper if the current depth is greater than zero and the category member is a sub category.
48
49
  WikipediaCategoryScraper.new(record.title, @options[:depth] - 1, @scraper.request_arguments[:params][:cmtitle] ).run unless @options[:depth] <= 0
49
50
  end
50
51
 
51
- }).
52
+ end.
52
53
  continue_with(:cmcontinue, ['query-continue', 'categorymembers', 'cmcontinue'])
53
54
  end
54
55
  end
@@ -89,8 +89,8 @@ module ExtraLoop
89
89
  # Returns itself.
90
90
  #
91
91
 
92
- def set_iteration(param, *args)
93
- #TODO: allow passing ranges as well as arrays
92
+ def set_iteration(param, *args, &block)
93
+ args << block if block
94
94
  if args.first.respond_to?(:map)
95
95
  @iteration_set = Array(args.first).map &:to_s
96
96
  else
@@ -110,7 +110,8 @@ module ExtraLoop
110
110
  #
111
111
  # Returns itself.
112
112
 
113
- def continue_with(param, *extractor_args)
113
+ def continue_with(param, *extractor_args, &block)
114
+ extractor_args << block if block
114
115
  raise Exceptions::NonGetAsyncRequestNotYetImplemented.new "the #continue_with method currently requires the 'async' option to be set to false" if @options[:async]
115
116
 
116
117
  @continue_clause_args = extractor_args
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: extraloop
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-01-28 00:00:00.000000000Z
12
+ date: 2012-01-30 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: yajl-ruby
16
- requirement: &19726880 !ruby/object:Gem::Requirement
16
+ requirement: &15579720 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 1.1.0
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *19726880
24
+ version_requirements: *15579720
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: nokogiri
27
- requirement: &19726420 !ruby/object:Gem::Requirement
27
+ requirement: &15579260 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 1.5.0
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *19726420
35
+ version_requirements: *15579260
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: typhoeus
38
- requirement: &19725960 !ruby/object:Gem::Requirement
38
+ requirement: &15578800 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: 0.3.2
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *19725960
46
+ version_requirements: *15578800
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: logging
49
- requirement: &19725500 !ruby/object:Gem::Requirement
49
+ requirement: &15578340 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 0.6.1
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *19725500
57
+ version_requirements: *15578340
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: rspec
60
- requirement: &19725040 !ruby/object:Gem::Requirement
60
+ requirement: &15577880 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 2.7.0
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *19725040
68
+ version_requirements: *15577880
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: rr
71
- requirement: &19724580 !ruby/object:Gem::Requirement
71
+ requirement: &15577420 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ~>
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: 1.0.4
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *19724580
79
+ version_requirements: *15577420
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: pry
82
- requirement: &19724060 !ruby/object:Gem::Requirement
82
+ requirement: &15576960 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ~>
@@ -87,7 +87,7 @@ dependencies:
87
87
  version: 0.9.7.4
88
88
  type: :development
89
89
  prerelease: false
90
- version_requirements: *19724060
90
+ version_requirements: *15576960
91
91
  description: A Ruby library for extracting data from websites and web based APIs.
92
92
  Supports most common document formats (i.e. HTML, XML, and JSON), and comes with
93
93
  a handy mechanism for iterating over paginated datasets.