bitreaper 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8099d3d4a818b30b2ac24729cc564e0f30ca282fa0dd169961b4440f74e53d87
4
- data.tar.gz: 507a81e5d915195358dc62c1cc8fddc86565778b97d02bf2cefe4951748d64f7
3
+ metadata.gz: 6f76eb586fc7ef623380a46183b99b55dc84eb8a0e620b78798e279be916c880
4
+ data.tar.gz: d25938d1f72c42fa225816f98e66cdfbffcc15b3f829327d3b97203af9dbe06d
5
5
  SHA512:
6
- metadata.gz: a89b5f7e9c6e5bb7bbd019a3cab051a5ceea8f3cd5c195ab1a95c7a3c776279567180918ad3aebb4cd911d83d5ea80bc464e3661cb4314200433d3e5d483af7c
7
- data.tar.gz: 07ad9cecd4e25faca388ce8e3fe05612a748189873f17acf21b07560099d1af8dff0acbc3ffe3556f99a1e6356c18a1b88fb54b7c7089fb9210112dcc127b7cd
6
+ metadata.gz: 2dd848cd9aee975fc662785dd8417beca9afb855a09afe9fed8e9b44965ae023c2faaf4706e48f1fecfab153d7daa710de245aaca57bc65d43bfc9bde5cb32bb
7
+ data.tar.gz: 3cb64e5a593c9184232f1101303a4d370537e7167dbf052ffcf0c351d7ea8d38fe0b824c2691ec7b764fa1dbb2921529f9ab07900d75d93c2856a42b28465326
@@ -93,7 +93,7 @@ if $inputFile!=""
93
93
  }
94
94
  else
95
95
  if not $verbose
96
- Parallel.each_with_index($urls, in_threads: 4, progress: " ► Processing... "){|url,i|
96
+ Parallel.each_with_index($urls, in_threads: 6, progress: " ► Processing... "){|url,i|
97
97
  br = BitReaper.new(url,$parser,i)
98
98
  $store << br.process()
99
99
  }
@@ -20,7 +20,7 @@ require 'sdl4r'
20
20
 
21
21
  require_relative 'bitreaper/helpers.rb'
22
22
 
23
- $bitreaper_version = "0.1.3"
23
+ $bitreaper_version = "0.1.4"
24
24
 
25
25
  ##
26
26
  # This is the main Web Scraper object. It is through a `BitScraper` instance
@@ -54,7 +54,7 @@ class BitReaper
54
54
 
55
55
  def self.getParser(file)
56
56
  parserFile = File.read(file)
57
- parserFile = parserFile.gsub(/([\w]+)\!\s/,'\1=on')
57
+ parserFile = parserFile.gsub(/([\w]+)\!/,'\1=on')
58
58
  if $verbose
59
59
  puts parserFile.split("\n").map{|l| " "+l}.join("\n").light_black
60
60
  puts ""
@@ -73,7 +73,9 @@ class BitReaper
73
73
  def download(url,withProgress=true)
74
74
  printProgress(@url,@index,0) if withProgress
75
75
 
76
- return Nokogiri::HTML(open(url))
76
+ html = Nokogiri::HTML(open(url))
77
+
78
+ return html
77
79
  end
78
80
 
79
81
  ##
@@ -108,6 +110,8 @@ class BitReaper
108
110
  when "download"
109
111
  val = val
110
112
  val.downloadAs($outputDest,(param.is_a? String) ? param : nil)
113
+ when "exclude"
114
+ val = false
111
115
  end
112
116
  return val
113
117
  end
@@ -126,9 +130,9 @@ class BitReaper
126
130
  when "join"
127
131
  val = val.join(param)
128
132
  when "first"
129
- val = val.first
133
+ val = param==true ? val.first : val.first(param)
130
134
  when "last"
131
- val = val.last
135
+ val = param==true ? val.last : val.last(param)
132
136
  when "index"
133
137
  val = val[param.to_i]
134
138
  when "select.include"
@@ -143,10 +147,42 @@ class BitReaper
143
147
  else
144
148
  val = val.select{|r| r==param }
145
149
  end
150
+ when "exclude"
151
+ val = false
146
152
  end
147
153
  return val
148
154
  end
149
155
 
156
+ ##
157
+ # Process Hash value using attribute
158
+ #
159
+ # @param [String] attrb The attribute to be processed
160
+ # @param [Array] val The value to processed
161
+ # @param [String] param The attribute's param (if any)
162
+ #
163
+ # @return [String,Array] The result of the operation
164
+
165
+ def processHashValue(attrb,val,param)
166
+ case attrb
167
+ when "list"
168
+ val = squish(val)
169
+ # toret = []
170
+ # list = val.first[1]
171
+ # list.each_with_index{|l,i|
172
+ # dict = {}
173
+ # val.keys.each{|key|
174
+ # if val[key].is_a? Array
175
+ # if i<val[key].count
176
+ # dict[key] = val[key][i]
177
+ # end
178
+ # end
179
+ # }
180
+ # toret << dict
181
+ # }
182
+ # val = toret
183
+ end
184
+ end
185
+
150
186
  ##
151
187
  # Process parsed values using set of attributes
152
188
  #
@@ -157,8 +193,14 @@ class BitReaper
157
193
 
158
194
  def processValues(values,attrbs)
159
195
  # check if we have a single value or an array of values
160
- ret = (values.count==1) ? values[0].content
161
- : values.map{|v| v.content}
196
+ if values.is_a? Nokogiri::XML::NodeSet
197
+ # it is a nodeset, so let's extract the .content property
198
+ ret = (values.count==1) ? values[0].content
199
+ : values.map{|v| v.content}
200
+ else
201
+ # not a nodeset (perhaps a hash of values?)
202
+ ret = values
203
+ end
162
204
 
163
205
  # no attributes, just return it
164
206
  return ret if attrbs.size==0
@@ -168,15 +210,21 @@ class BitReaper
168
210
  # get params if we have multiple params; or not
169
211
  param = (arg.include? "||") ? (arg.split("||").map{|a| Liquid::Template.parse(a).render(@store) })
170
212
  : Liquid::Template.parse(arg).render(@store)
213
+ else
214
+ param = arg
171
215
  end
172
216
 
173
217
  if ret.is_a? String
174
218
  # if our value is a String, process it accordingly
175
219
  ret = self.processStringValue(attrb,ret,param)
176
- else
220
+ elsif ret.is_a? Array
177
221
  # it's an array of values, so look for array-operating attributes
178
222
  ret = self.processArrayValue(attrb,ret,param)
179
-
223
+ elsif ret.is_a? Hash
224
+ # it's a value hash, so process it accordingly
225
+ ret = self.processHashValue(attrb,ret,param)
226
+ else
227
+ ## Wtf is that?
180
228
  end
181
229
  }
182
230
 
@@ -194,26 +242,41 @@ class BitReaper
194
242
  def processNode(noko,node,store,level=0)
195
243
  node.children.each{|child|
196
244
  command = child.namespace
197
- tag = child.name
245
+ tag = Liquid::Template.parse(child.name).render(@store)
198
246
  pattern = child.values[0]
199
247
  attrs = child.attributes
200
248
 
201
- if child.children.count==0
202
- # no children, so it's a "get"
203
- values = noko.search(pattern)
204
-
205
- if values.count>0
206
- store[tag] = self.processValues(values, attrs)
249
+ if not command==""
250
+ case tag
251
+ when "fetch"
252
+ gotoUrl = Liquid::Template.parse(pattern).render(@store)
253
+ br = BitReaper.new(gotoUrl,child)
254
+ store.merge! br.process()
207
255
  end
208
256
  else
209
- # it's a "section"
210
- store[tag] = {}
211
- if pattern.nil?
212
- subnoko = noko
257
+ if child.children.count==0
258
+ # no children, so it's a "get"
259
+ values = noko.search(pattern)
260
+
261
+ if values.count>0
262
+ processed = self.processValues(values,attrs)
263
+ if processed!=false
264
+ store[tag] = processed
265
+ end
266
+ end
213
267
  else
214
- subnoko = noko.search(pattern)
268
+ # it's a "section"
269
+ store[tag] = {}
270
+
271
+ if pattern.nil?
272
+ subnoko = noko
273
+ else
274
+ subnoko = noko.search(pattern)
275
+ end
276
+
277
+ processNode(subnoko,child,store[tag],level+1)
278
+ store[tag] = self.processValues(store[tag],attrs)
215
279
  end
216
- processNode(subnoko,child,store[tag],level+1)
217
280
  end
218
281
  }
219
282
  end
@@ -48,6 +48,16 @@ class String
48
48
  end
49
49
  end
50
50
 
51
+ def squish(ha)
52
+ h = ha
53
+ h.each{ |key,val|
54
+ if not val.nil? and val.is_a? Hash
55
+ h[key] = squish(val)
56
+ end
57
+ }
58
+ h.values.then { |a, *b| a.zip *b }.map { |e| (h.keys.zip e).to_h }
59
+ end
60
+
51
61
  ## Core
52
62
 
53
63
  def printLogo
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bitreaper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dr.Kameleon