bitreaper 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/bitreaper.rb +27 -29
  3. metadata +114 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 806186133e8f475e8040fc7bee4e676d49665f83f16bcb127c171b7239e0aa94
4
- data.tar.gz: 693e9fbe65d0b4e697c9cf7eaa871ed905b35509b8a0ff7dd0953933c8b77635
3
+ metadata.gz: 8099d3d4a818b30b2ac24729cc564e0f30ca282fa0dd169961b4440f74e53d87
4
+ data.tar.gz: 507a81e5d915195358dc62c1cc8fddc86565778b97d02bf2cefe4951748d64f7
5
5
  SHA512:
6
- metadata.gz: 3080d190593d846cb6cc225c9e775e3899725a10e8785b7d506ba931d12203bdf76f7b0fe533f09b6b074761c1df9f0d46a80c513849377b1ec0cc8b49fdf4f6
7
- data.tar.gz: 84a2212ce1ab9c9ccd3a43cec0bcff53d3b6ff814d56bc833488668193b7e8ffc80b18732a07b0c5409fa392e645c8c7de5a58f2f052da3bcee772adffe26dbe
6
+ metadata.gz: a89b5f7e9c6e5bb7bbd019a3cab051a5ceea8f3cd5c195ab1a95c7a3c776279567180918ad3aebb4cd911d83d5ea80bc464e3661cb4314200433d3e5d483af7c
7
+ data.tar.gz: 07ad9cecd4e25faca388ce8e3fe05612a748189873f17acf21b07560099d1af8dff0acbc3ffe3556f99a1e6356c18a1b88fb54b7c7089fb9210112dcc127b7cd
data/lib/bitreaper.rb CHANGED
@@ -17,31 +17,24 @@ require 'liquid'
17
17
  require 'nokogiri'
18
18
  require 'open-uri'
19
19
  require 'sdl4r'
20
- require 'watir'
21
- require 'webdrivers'
22
20
 
23
21
  require_relative 'bitreaper/helpers.rb'
24
22
 
25
- ##########################################
26
- # SUPERGLOBALS
27
- ##########################################
23
+ $bitreaper_version = "0.1.3"
28
24
 
29
- $bitreaper_version = 0.1.2
30
-
31
- ####################################################################################
32
- # **MAIN CLASS**
25
+ ##
33
26
  # This is the main Web Scraper object. It is through a `BitScraper` instance
34
27
  # that you can start scraping
35
- ####################################################################################
36
28
 
37
29
  class BitReaper
38
30
 
31
+ ##
39
32
  # Create a new BitReaper instance
40
33
  #
41
34
  # @param [String] url The URL of the page to be scraped
42
35
  # @param [String,SDL4R::Tag] parser The parser
43
36
  # @param [Integer] i Index of the current operation (for reporting purposes)
44
- #---------------------------------------------------------------------------
37
+
45
38
  def initialize(url,parser,i=0)
46
39
  @url = url
47
40
  @parser = (parser.is_a? String) ? self.getParser(parser) : parser
@@ -52,12 +45,13 @@ class BitReaper
52
45
  @noko = self.download(@url)
53
46
  end
54
47
 
48
+ ##
55
49
  # Get a new parser from a given parser path
56
50
  #
57
51
  # @param [String] file The path of the `.br` parser file
58
52
  #
59
53
  # @return [SDL4R::Tag] The resulting parser
60
- #---------------------------------------------------------------------------
54
+
61
55
  def self.getParser(file)
62
56
  parserFile = File.read(file)
63
57
  parserFile = parserFile.gsub(/([\w]+)\!\s/,'\1=on')
@@ -69,30 +63,20 @@ class BitReaper
69
63
  return SDL4R::read(parserFile)
70
64
  end
71
65
 
72
- # Process current project
73
- #---------------------------------------------------------------------------
74
- def process
75
- printProgress(@url,@index,1)
76
- processNode(@noko, @parser, @store)
77
-
78
- printProgress(@url,@index,2)
79
- return @store
80
- end
81
-
82
- private
83
-
66
+ ##
84
67
  # Download given URL
85
68
  #
86
69
  # @param [String] url The URL to be downloaded
87
70
  #
88
71
  # @return [Nokogiri::XML::NodeSet] The resulting nodes
89
- #---------------------------------------------------------------------------
72
+
90
73
  def download(url,withProgress=true)
91
74
  printProgress(@url,@index,0) if withProgress
92
75
 
93
76
  return Nokogiri::HTML(open(url))
94
77
  end
95
78
 
79
+ ##
96
80
  # Process String value using attribute
97
81
  #
98
82
  # @param [String] attrb The attribute to be processed
@@ -100,7 +84,7 @@ class BitReaper
100
84
  # @param [String] param The attribute's param (if any)
101
85
  #
102
86
  # @return [String,Array] The result of the operation
103
- #---------------------------------------------------------------------------
87
+
104
88
  def processStringValue(attrb,val,param)
105
89
  case attrb
106
90
  when "prepend"
@@ -128,6 +112,7 @@ class BitReaper
128
112
  return val
129
113
  end
130
114
 
115
+ ##
131
116
  # Process Array value using attribute
132
117
  #
133
118
  # @param [String] attrb The attribute to be processed
@@ -135,7 +120,7 @@ class BitReaper
135
120
  # @param [String] param The attribute's param (if any)
136
121
  #
137
122
  # @return [String,Array] The result of the operation
138
- #---------------------------------------------------------------------------
123
+
139
124
  def processArrayValue(attrb,val,param)
140
125
  case attrb
141
126
  when "join"
@@ -162,13 +147,14 @@ class BitReaper
162
147
  return val
163
148
  end
164
149
 
150
+ ##
165
151
  # Process parsed values using set of attributes
166
152
  #
167
153
  # @param [Array] values The parsed values
168
154
  # @param [Array] attrbs The associated attributes
169
155
  #
170
156
  # @return [String,Array] The result of the operation
171
- #---------------------------------------------------------------------------
157
+
172
158
  def processValues(values,attrbs)
173
159
  # check if we have a single value or an array of values
174
160
  ret = (values.count==1) ? values[0].content
@@ -197,13 +183,14 @@ class BitReaper
197
183
  return (ret.nil?) ? "" : ret
198
184
  end
199
185
 
186
+ ##
200
187
  # Process a given node using provided parser and temporary storage hash
201
188
  #
202
189
  # @param [Nokogiri::XML::node] noko The Nokogiri node to work on
203
190
  # @param [SDL4R::Tag] node The parser node
204
191
  # @param [Hash] store The temporary storage hash
205
192
  # @param [Integer] level The nesting level (for informational purposes)
206
- #---------------------------------------------------------------------------
193
+
207
194
  def processNode(noko,node,store,level=0)
208
195
  node.children.each{|child|
209
196
  command = child.namespace
@@ -231,6 +218,17 @@ class BitReaper
231
218
  }
232
219
  end
233
220
 
221
+ ##
222
+ # Process current project
223
+
224
+ def process
225
+ printProgress(@url,@index,1)
226
+ processNode(@noko, @parser, @store)
227
+
228
+ printProgress(@url,@index,2)
229
+ return @store
230
+ end
231
+
234
232
  end
235
233
 
236
234
  #######################################################
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bitreaper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dr.Kameleon
@@ -9,7 +9,119 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
  date: 2020-04-09 00:00:00.000000000 Z
12
- dependencies: []
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: awesome_print
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: colorize
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: down
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: fileutils
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: json
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: liquid
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: nokogiri
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: sdl4r
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
13
125
  description: Automated Web-Scraping Client for Ruby using SLD2-like configuration
14
126
  files. Supports XPath and CSS selectors via Nokogiri.
15
127
  email: drkameleon@gmail.com