bitreaper 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8099d3d4a818b30b2ac24729cc564e0f30ca282fa0dd169961b4440f74e53d87
4
- data.tar.gz: 507a81e5d915195358dc62c1cc8fddc86565778b97d02bf2cefe4951748d64f7
3
+ metadata.gz: 6f76eb586fc7ef623380a46183b99b55dc84eb8a0e620b78798e279be916c880
4
+ data.tar.gz: d25938d1f72c42fa225816f98e66cdfbffcc15b3f829327d3b97203af9dbe06d
5
5
  SHA512:
6
- metadata.gz: a89b5f7e9c6e5bb7bbd019a3cab051a5ceea8f3cd5c195ab1a95c7a3c776279567180918ad3aebb4cd911d83d5ea80bc464e3661cb4314200433d3e5d483af7c
7
- data.tar.gz: 07ad9cecd4e25faca388ce8e3fe05612a748189873f17acf21b07560099d1af8dff0acbc3ffe3556f99a1e6356c18a1b88fb54b7c7089fb9210112dcc127b7cd
6
+ metadata.gz: 2dd848cd9aee975fc662785dd8417beca9afb855a09afe9fed8e9b44965ae023c2faaf4706e48f1fecfab153d7daa710de245aaca57bc65d43bfc9bde5cb32bb
7
+ data.tar.gz: 3cb64e5a593c9184232f1101303a4d370537e7167dbf052ffcf0c351d7ea8d38fe0b824c2691ec7b764fa1dbb2921529f9ab07900d75d93c2856a42b28465326
@@ -93,7 +93,7 @@ if $inputFile!=""
93
93
  }
94
94
  else
95
95
  if not $verbose
96
- Parallel.each_with_index($urls, in_threads: 4, progress: " ► Processing... "){|url,i|
96
+ Parallel.each_with_index($urls, in_threads: 6, progress: " ► Processing... "){|url,i|
97
97
  br = BitReaper.new(url,$parser,i)
98
98
  $store << br.process()
99
99
  }
@@ -20,7 +20,7 @@ require 'sdl4r'
20
20
 
21
21
  require_relative 'bitreaper/helpers.rb'
22
22
 
23
- $bitreaper_version = "0.1.3"
23
+ $bitreaper_version = "0.1.4"
24
24
 
25
25
  ##
26
26
  # This is the main Web Scraper object. It is through a `BitScraper` instance
@@ -54,7 +54,7 @@ class BitReaper
54
54
 
55
55
  def self.getParser(file)
56
56
  parserFile = File.read(file)
57
- parserFile = parserFile.gsub(/([\w]+)\!\s/,'\1=on')
57
+ parserFile = parserFile.gsub(/([\w]+)\!/,'\1=on')
58
58
  if $verbose
59
59
  puts parserFile.split("\n").map{|l| " "+l}.join("\n").light_black
60
60
  puts ""
@@ -73,7 +73,9 @@ class BitReaper
73
73
  def download(url,withProgress=true)
74
74
  printProgress(@url,@index,0) if withProgress
75
75
 
76
- return Nokogiri::HTML(open(url))
76
+ html = Nokogiri::HTML(open(url))
77
+
78
+ return html
77
79
  end
78
80
 
79
81
  ##
@@ -108,6 +110,8 @@ class BitReaper
108
110
  when "download"
109
111
  val = val
110
112
  val.downloadAs($outputDest,(param.is_a? String) ? param : nil)
113
+ when "exclude"
114
+ val = false
111
115
  end
112
116
  return val
113
117
  end
@@ -126,9 +130,9 @@ class BitReaper
126
130
  when "join"
127
131
  val = val.join(param)
128
132
  when "first"
129
- val = val.first
133
+ val = param==true ? val.first : val.first(param)
130
134
  when "last"
131
- val = val.last
135
+ val = param==true ? val.last : val.last(param)
132
136
  when "index"
133
137
  val = val[param.to_i]
134
138
  when "select.include"
@@ -143,10 +147,42 @@ class BitReaper
143
147
  else
144
148
  val = val.select{|r| r==param }
145
149
  end
150
+ when "exclude"
151
+ val = false
146
152
  end
147
153
  return val
148
154
  end
149
155
 
156
+ ##
157
+ # Process Hash value using attribute
158
+ #
159
+ # @param [String] attrb The attribute to be processed
160
+ # @param [Array] val The value to processed
161
+ # @param [String] param The attribute's param (if any)
162
+ #
163
+ # @return [String,Array] The result of the operation
164
+
165
+ def processHashValue(attrb,val,param)
166
+ case attrb
167
+ when "list"
168
+ val = squish(val)
169
+ # toret = []
170
+ # list = val.first[1]
171
+ # list.each_with_index{|l,i|
172
+ # dict = {}
173
+ # val.keys.each{|key|
174
+ # if val[key].is_a? Array
175
+ # if i<val[key].count
176
+ # dict[key] = val[key][i]
177
+ # end
178
+ # end
179
+ # }
180
+ # toret << dict
181
+ # }
182
+ # val = toret
183
+ end
184
+ end
185
+
150
186
  ##
151
187
  # Process parsed values using set of attributes
152
188
  #
@@ -157,8 +193,14 @@ class BitReaper
157
193
 
158
194
  def processValues(values,attrbs)
159
195
  # check if we have a single value or an array of values
160
- ret = (values.count==1) ? values[0].content
161
- : values.map{|v| v.content}
196
+ if values.is_a? Nokogiri::XML::NodeSet
197
+ # it is a nodeset, so let's extract the .content property
198
+ ret = (values.count==1) ? values[0].content
199
+ : values.map{|v| v.content}
200
+ else
201
+ # not a nodeset (perhaps a hash of values?)
202
+ ret = values
203
+ end
162
204
 
163
205
  # no attributes, just return it
164
206
  return ret if attrbs.size==0
@@ -168,15 +210,21 @@ class BitReaper
168
210
  # get params if we have multiple params; or not
169
211
  param = (arg.include? "||") ? (arg.split("||").map{|a| Liquid::Template.parse(a).render(@store) })
170
212
  : Liquid::Template.parse(arg).render(@store)
213
+ else
214
+ param = arg
171
215
  end
172
216
 
173
217
  if ret.is_a? String
174
218
  # if our value is a String, process it accordingly
175
219
  ret = self.processStringValue(attrb,ret,param)
176
- else
220
+ elsif ret.is_a? Array
177
221
  # it's an array of values, so look for array-operating attributes
178
222
  ret = self.processArrayValue(attrb,ret,param)
179
-
223
+ elsif ret.is_a? Hash
224
+ # it's a value hash, so process it accordingly
225
+ ret = self.processHashValue(attrb,ret,param)
226
+ else
227
+ ## Wtf is that?
180
228
  end
181
229
  }
182
230
 
@@ -194,26 +242,41 @@ class BitReaper
194
242
  def processNode(noko,node,store,level=0)
195
243
  node.children.each{|child|
196
244
  command = child.namespace
197
- tag = child.name
245
+ tag = Liquid::Template.parse(child.name).render(@store)
198
246
  pattern = child.values[0]
199
247
  attrs = child.attributes
200
248
 
201
- if child.children.count==0
202
- # no children, so it's a "get"
203
- values = noko.search(pattern)
204
-
205
- if values.count>0
206
- store[tag] = self.processValues(values, attrs)
249
+ if not command==""
250
+ case tag
251
+ when "fetch"
252
+ gotoUrl = Liquid::Template.parse(pattern).render(@store)
253
+ br = BitReaper.new(gotoUrl,child)
254
+ store.merge! br.process()
207
255
  end
208
256
  else
209
- # it's a "section"
210
- store[tag] = {}
211
- if pattern.nil?
212
- subnoko = noko
257
+ if child.children.count==0
258
+ # no children, so it's a "get"
259
+ values = noko.search(pattern)
260
+
261
+ if values.count>0
262
+ processed = self.processValues(values,attrs)
263
+ if processed!=false
264
+ store[tag] = processed
265
+ end
266
+ end
213
267
  else
214
- subnoko = noko.search(pattern)
268
+ # it's a "section"
269
+ store[tag] = {}
270
+
271
+ if pattern.nil?
272
+ subnoko = noko
273
+ else
274
+ subnoko = noko.search(pattern)
275
+ end
276
+
277
+ processNode(subnoko,child,store[tag],level+1)
278
+ store[tag] = self.processValues(store[tag],attrs)
215
279
  end
216
- processNode(subnoko,child,store[tag],level+1)
217
280
  end
218
281
  }
219
282
  end
@@ -48,6 +48,16 @@ class String
48
48
  end
49
49
  end
50
50
 
51
+ def squish(ha)
52
+ h = ha
53
+ h.each{ |key,val|
54
+ if not val.nil? and val.is_a? Hash
55
+ h[key] = squish(val)
56
+ end
57
+ }
58
+ h.values.then { |a, *b| a.zip *b }.map { |e| (h.keys.zip e).to_h }
59
+ end
60
+
51
61
  ## Core
52
62
 
53
63
  def printLogo
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bitreaper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dr.Kameleon