bitreaper 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/bitreaper.rb +27 -29
  3. metadata +114 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 806186133e8f475e8040fc7bee4e676d49665f83f16bcb127c171b7239e0aa94
4
- data.tar.gz: 693e9fbe65d0b4e697c9cf7eaa871ed905b35509b8a0ff7dd0953933c8b77635
3
+ metadata.gz: 8099d3d4a818b30b2ac24729cc564e0f30ca282fa0dd169961b4440f74e53d87
4
+ data.tar.gz: 507a81e5d915195358dc62c1cc8fddc86565778b97d02bf2cefe4951748d64f7
5
5
  SHA512:
6
- metadata.gz: 3080d190593d846cb6cc225c9e775e3899725a10e8785b7d506ba931d12203bdf76f7b0fe533f09b6b074761c1df9f0d46a80c513849377b1ec0cc8b49fdf4f6
7
- data.tar.gz: 84a2212ce1ab9c9ccd3a43cec0bcff53d3b6ff814d56bc833488668193b7e8ffc80b18732a07b0c5409fa392e645c8c7de5a58f2f052da3bcee772adffe26dbe
6
+ metadata.gz: a89b5f7e9c6e5bb7bbd019a3cab051a5ceea8f3cd5c195ab1a95c7a3c776279567180918ad3aebb4cd911d83d5ea80bc464e3661cb4314200433d3e5d483af7c
7
+ data.tar.gz: 07ad9cecd4e25faca388ce8e3fe05612a748189873f17acf21b07560099d1af8dff0acbc3ffe3556f99a1e6356c18a1b88fb54b7c7089fb9210112dcc127b7cd
data/lib/bitreaper.rb CHANGED
@@ -17,31 +17,24 @@ require 'liquid'
17
17
  require 'nokogiri'
18
18
  require 'open-uri'
19
19
  require 'sdl4r'
20
- require 'watir'
21
- require 'webdrivers'
22
20
 
23
21
  require_relative 'bitreaper/helpers.rb'
24
22
 
25
- ##########################################
26
- # SUPERGLOBALS
27
- ##########################################
23
+ $bitreaper_version = "0.1.3"
28
24
 
29
- $bitreaper_version = 0.1.2
30
-
31
- ####################################################################################
32
- # **MAIN CLASS**
25
+ ##
33
26
  # This is the main Web Scraper object. It is through a `BitScraper` instance
34
27
  # that you can start scraping
35
- ####################################################################################
36
28
 
37
29
  class BitReaper
38
30
 
31
+ ##
39
32
  # Create a new BitReaper instance
40
33
  #
41
34
  # @param [String] url The URL of the page to be scraped
42
35
  # @param [String,SDL4R::Tag] parser The parser
43
36
  # @param [Integer] i Index of the current operation (for reporting purposes)
44
- #---------------------------------------------------------------------------
37
+
45
38
  def initialize(url,parser,i=0)
46
39
  @url = url
47
40
  @parser = (parser.is_a? String) ? self.getParser(parser) : parser
@@ -52,12 +45,13 @@ class BitReaper
52
45
  @noko = self.download(@url)
53
46
  end
54
47
 
48
+ ##
55
49
  # Get a new parser from a given parser path
56
50
  #
57
51
  # @param [String] file The path of the `.br` parser file
58
52
  #
59
53
  # @return [SDL4R::Tag] The resulting parser
60
- #---------------------------------------------------------------------------
54
+
61
55
  def self.getParser(file)
62
56
  parserFile = File.read(file)
63
57
  parserFile = parserFile.gsub(/([\w]+)\!\s/,'\1=on')
@@ -69,30 +63,20 @@ class BitReaper
69
63
  return SDL4R::read(parserFile)
70
64
  end
71
65
 
72
- # Process current project
73
- #---------------------------------------------------------------------------
74
- def process
75
- printProgress(@url,@index,1)
76
- processNode(@noko, @parser, @store)
77
-
78
- printProgress(@url,@index,2)
79
- return @store
80
- end
81
-
82
- private
83
-
66
+ ##
84
67
  # Download given URL
85
68
  #
86
69
  # @param [String] url The URL to be downloaded
87
70
  #
88
71
  # @return [Nokogiri::XML::NodeSet] The resulting nodes
89
- #---------------------------------------------------------------------------
72
+
90
73
  def download(url,withProgress=true)
91
74
  printProgress(@url,@index,0) if withProgress
92
75
 
93
76
  return Nokogiri::HTML(open(url))
94
77
  end
95
78
 
79
+ ##
96
80
  # Process String value using attribute
97
81
  #
98
82
  # @param [String] attrb The attribute to be processed
@@ -100,7 +84,7 @@ class BitReaper
100
84
  # @param [String] param The attribute's param (if any)
101
85
  #
102
86
  # @return [String,Array] The result of the operation
103
- #---------------------------------------------------------------------------
87
+
104
88
  def processStringValue(attrb,val,param)
105
89
  case attrb
106
90
  when "prepend"
@@ -128,6 +112,7 @@ class BitReaper
128
112
  return val
129
113
  end
130
114
 
115
+ ##
131
116
  # Process Array value using attribute
132
117
  #
133
118
  # @param [String] attrb The attribute to be processed
@@ -135,7 +120,7 @@ class BitReaper
135
120
  # @param [String] param The attribute's param (if any)
136
121
  #
137
122
  # @return [String,Array] The result of the operation
138
- #---------------------------------------------------------------------------
123
+
139
124
  def processArrayValue(attrb,val,param)
140
125
  case attrb
141
126
  when "join"
@@ -162,13 +147,14 @@ class BitReaper
162
147
  return val
163
148
  end
164
149
 
150
+ ##
165
151
  # Process parsed values using set of attributes
166
152
  #
167
153
  # @param [Array] values The parsed values
168
154
  # @param [Array] attrbs The associated attributes
169
155
  #
170
156
  # @return [String,Array] The result of the operation
171
- #---------------------------------------------------------------------------
157
+
172
158
  def processValues(values,attrbs)
173
159
  # check if we have a single value or an array of values
174
160
  ret = (values.count==1) ? values[0].content
@@ -197,13 +183,14 @@ class BitReaper
197
183
  return (ret.nil?) ? "" : ret
198
184
  end
199
185
 
186
+ ##
200
187
  # Process a given node using provided parser and temporary storage hash
201
188
  #
202
189
  # @param [Nokogiri::XML::node] noko The Nokogiri node to work on
203
190
  # @param [SDL4R::Tag] node The parser node
204
191
  # @param [Hash] store The temporary storage hash
205
192
  # @param [Integer] level The nesting level (for informational purposes)
206
- #---------------------------------------------------------------------------
193
+
207
194
  def processNode(noko,node,store,level=0)
208
195
  node.children.each{|child|
209
196
  command = child.namespace
@@ -231,6 +218,17 @@ class BitReaper
231
218
  }
232
219
  end
233
220
 
221
+ ##
222
+ # Process current project
223
+
224
+ def process
225
+ printProgress(@url,@index,1)
226
+ processNode(@noko, @parser, @store)
227
+
228
+ printProgress(@url,@index,2)
229
+ return @store
230
+ end
231
+
234
232
  end
235
233
 
236
234
  #######################################################
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bitreaper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dr.Kameleon
@@ -9,7 +9,119 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
  date: 2020-04-09 00:00:00.000000000 Z
12
- dependencies: []
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: awesome_print
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: colorize
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: down
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: fileutils
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: json
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: liquid
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: nokogiri
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: sdl4r
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
13
125
  description: Automated Web-Scraping Client for Ruby using SLD2-like configuration
14
126
  files. Supports XPath and CSS selectors via Nokogiri.
15
127
  email: drkameleon@gmail.com