bliss 0.0.9 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.rdoc +5 -0
- data/README.rdoc +11 -7
- data/VERSION +1 -1
- data/bliss.gemspec +4 -2
- data/lib/bliss/constraint.rb +68 -33
- data/lib/bliss/format.rb +84 -56
- data/lib/bliss/parser.rb +36 -19
- data/lib/bliss/parser_machine.rb +70 -11
- data/spec.yml +23 -19
- data/spec/constraint_spec.rb +23 -0
- data/spec/format_spec.rb +20 -46
- data/spec/parser_spec.rb +26 -0
- data/test.rb +9 -4
- metadata +19 -17
data/CHANGELOG.rdoc
CHANGED
@@ -1,7 +1,12 @@
|
|
1
|
+
== 0.1.0
|
2
|
+
* Added support for (possible) "content_values" on Bliss::Format.
|
3
|
+
|
1
4
|
== 0.0.9
|
2
5
|
|
3
6
|
* Features
|
4
7
|
|
8
|
+
* Added on_timeout callback.
|
9
|
+
* Introduced Bliss::Format, which linked to a Bliss::Parser are checked during parsing process. They are defined with a bunch of specs given as YAML.
|
5
10
|
* added support for Gzip (content type is autodetected).
|
6
11
|
* on_max_unhandled_bytes callback block. Which receives the amount of bytes and a block to execute when that limit is reached.
|
7
12
|
* on_tag_close maybe used without a specific tag name, and block now handles "current depth" array.
|
data/README.rdoc
CHANGED
@@ -3,26 +3,30 @@
|
|
3
3
|
require 'rubygems'
|
4
4
|
require 'bliss'
|
5
5
|
|
6
|
-
path = 'http://www.yourdomain.com/input.xml'
|
6
|
+
path = 'http://www.yourdomain.com/input.xml' # it supports tar.gz too!
|
7
7
|
|
8
|
-
parser = Bliss::
|
8
|
+
parser = Bliss::Parser.new(path, 'output.xml') # outputs read xml
|
9
9
|
count = 0
|
10
|
-
|
11
|
-
puts
|
10
|
+
parser.on_max_unhandled_bytes(20000) {
|
11
|
+
puts 'Stopped parsing caused content data for tag was too big!'
|
12
|
+
parser.close
|
12
13
|
}
|
13
|
-
parser.on_tag_open('ad') { |depth|
|
14
|
+
parser.on_tag_open('ads/ad') { |depth|
|
14
15
|
puts depth.inspect
|
15
16
|
}
|
16
|
-
parser.on_tag_close('ad') { |hash|
|
17
|
+
parser.on_tag_close('ads/ad') { |hash, depth|
|
17
18
|
count += 1
|
18
19
|
puts hash.inspect
|
19
20
|
if count == 4
|
20
21
|
parser.close
|
21
22
|
end
|
22
23
|
}
|
24
|
+
|
25
|
+
parser.on_timeout(5) {
|
26
|
+
puts 'Timeout!'
|
27
|
+
}
|
23
28
|
|
24
29
|
parser.parse
|
25
|
-
puts "Root: #{parser.root}"
|
26
30
|
|
27
31
|
== Contributing to bliss
|
28
32
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0
|
1
|
+
0.1.0
|
data/bliss.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "bliss"
|
8
|
-
s.version = "0.0
|
8
|
+
s.version = "0.1.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Fernando Alonso"]
|
12
|
-
s.date = "2012-06-
|
12
|
+
s.date = "2012-06-12"
|
13
13
|
s.description = "streamed xml parsing tool"
|
14
14
|
s.email = "krakatoa1987@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -38,7 +38,9 @@ Gem::Specification.new do |s|
|
|
38
38
|
"lib/bliss/parser_machine.rb",
|
39
39
|
"lib/hash_extension.rb",
|
40
40
|
"spec.yml",
|
41
|
+
"spec/constraint_spec.rb",
|
41
42
|
"spec/format_spec.rb",
|
43
|
+
"spec/parser_spec.rb",
|
42
44
|
"spec/spec_helper.rb",
|
43
45
|
"test.rb",
|
44
46
|
"test/helper.rb",
|
data/lib/bliss/constraint.rb
CHANGED
@@ -1,30 +1,22 @@
|
|
1
1
|
module Bliss
|
2
2
|
class Constraint
|
3
|
-
|
4
|
-
attr_reader :
|
5
|
-
|
6
|
-
#TYPES = [:exist, :not_blank, :possible_values]
|
3
|
+
attr_accessor :depth, :possible_values
|
4
|
+
attr_reader :setting, :state
|
7
5
|
|
8
6
|
def initialize(depth, setting, params={})
|
9
7
|
@depth = depth
|
10
8
|
@setting = setting
|
9
|
+
@possible_values = params[:possible_values].collect(&:to_s) if params.has_key?(:possible_values)
|
11
10
|
|
12
11
|
@state = :not_checked
|
13
12
|
end
|
14
13
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
# else
|
19
|
-
# @field = [field]
|
20
|
-
# end
|
21
|
-
# @type = type
|
22
|
-
# @possible_values = possible_values
|
23
|
-
#
|
24
|
-
# @state = :not_checked
|
25
|
-
#end
|
14
|
+
def tag_names
|
15
|
+
@depth.split('/').last.gsub('(', '').gsub(')', '').split('|')
|
16
|
+
end
|
26
17
|
|
27
|
-
|
18
|
+
# TODO should exist another method passed! for tag_name_required ?
|
19
|
+
def run!(hash=nil)
|
28
20
|
@state = :not_checked
|
29
21
|
#@field.each do |field|
|
30
22
|
#if @state == :passed
|
@@ -32,38 +24,81 @@ module Bliss
|
|
32
24
|
#end
|
33
25
|
case @setting
|
34
26
|
when :tag_name_required
|
35
|
-
|
36
|
-
|
27
|
+
content = nil
|
28
|
+
if hash
|
29
|
+
#puts "#{@depth.inspect} - required: #{required.inspect}"
|
30
|
+
|
31
|
+
found = false
|
32
|
+
self.tag_names.each do |key|
|
33
|
+
if hash.keys.include?(key)
|
34
|
+
found = true
|
35
|
+
break
|
36
|
+
end
|
37
|
+
end
|
38
|
+
if found
|
39
|
+
@state = :passed
|
40
|
+
else
|
41
|
+
@state = :not_passed
|
42
|
+
end
|
37
43
|
else
|
38
44
|
@state = :passed
|
39
45
|
end
|
46
|
+
when :content_values
|
47
|
+
if hash
|
48
|
+
found = false
|
49
|
+
self.tag_names.each do |key|
|
50
|
+
content = hash[key]
|
51
|
+
puts content
|
52
|
+
puts @possible_values.inspect
|
53
|
+
if @possible_values.include?(content)
|
54
|
+
found = true
|
55
|
+
break
|
56
|
+
end
|
57
|
+
end
|
58
|
+
if found
|
59
|
+
@state = :passed
|
60
|
+
else
|
61
|
+
@state = :not_passed
|
62
|
+
end
|
63
|
+
end
|
40
64
|
#when :not_blank
|
41
65
|
# if hash.has_key?(field) and !hash[field].to_s.empty?
|
42
66
|
# @state = :passed
|
43
67
|
# else
|
44
68
|
# @state = :not_passed
|
45
69
|
# end
|
46
|
-
#when :possible_values
|
47
|
-
# if hash.has_key?(field) and @possible_values.include?(hash[field])
|
48
|
-
# @state = :passed
|
49
|
-
# else
|
50
|
-
# @state = :not_passed
|
51
|
-
# end
|
52
70
|
end
|
53
71
|
#end
|
54
72
|
@state
|
55
73
|
end
|
56
74
|
|
75
|
+
def ended!
|
76
|
+
case @setting
|
77
|
+
when :tag_name_required
|
78
|
+
if @state == :not_checked
|
79
|
+
@state = :not_passed
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
57
84
|
def detail
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
85
|
+
self.ended! # TODO esto es una chota de codigo groncho!
|
86
|
+
|
87
|
+
case @state
|
88
|
+
when :not_passed
|
89
|
+
case @setting
|
90
|
+
when :tag_name_required
|
91
|
+
[@depth, "missing"]
|
92
|
+
#when :not_blank
|
93
|
+
# [@field.join(" or "), "blank"]
|
94
|
+
#when :possible_values
|
95
|
+
# [@field.join(" or "), "invalid"]
|
96
|
+
end
|
97
|
+
when :passed
|
98
|
+
case @setting
|
99
|
+
when :tag_name_required
|
100
|
+
[@depth, "exists"]
|
101
|
+
end
|
67
102
|
end
|
68
103
|
end
|
69
104
|
|
data/lib/bliss/format.rb
CHANGED
@@ -4,9 +4,8 @@ module Bliss
|
|
4
4
|
class Format
|
5
5
|
@@keywords = %w{ tag_name_required content_required tag_name_type content_type tag_name_format content_format tag_name_values content_values }
|
6
6
|
|
7
|
-
def initialize
|
8
|
-
|
9
|
-
self.specifications = yml
|
7
|
+
def initialize(filepath)
|
8
|
+
self.specifications = YAML.load_file(filepath)
|
10
9
|
end
|
11
10
|
|
12
11
|
# TODO for debugging only!
|
@@ -21,77 +20,106 @@ module Bliss
|
|
21
20
|
|
22
21
|
def constraints
|
23
22
|
return [] if not (@specs.is_a? Hash and @specs.size > 0)
|
23
|
+
return @constraints if @constraints
|
24
24
|
|
25
|
-
constraints = []
|
25
|
+
@constraints = []
|
26
26
|
|
27
27
|
@specs.recurse(true) do |depth, value|
|
28
|
-
if !@@keywords.include?(depth.last)
|
29
|
-
settings =
|
28
|
+
if value.is_a? Hash and !@@keywords.include?(depth.last)
|
29
|
+
settings = value.select { |key| @@keywords.include?(key) }
|
30
30
|
end
|
31
|
-
|
31
|
+
#settings = @specs.value_at_chain(depth).select{|key| @@keywords.include?(key) }
|
32
|
+
if settings.is_a? Hash and !@@keywords.include?(depth.last)
|
32
33
|
settings.merge!({"tag_name_required" => true}) if not settings.has_key?("tag_name_required")
|
33
34
|
|
34
|
-
|
35
|
+
# TODO this is an ugly way to move tag_name_values to the end!
|
36
|
+
settings.store('tag_name_values', settings.delete('tag_name_values')) if settings.has_key?('tag_name_values')
|
37
|
+
settings.store('content_values', settings.delete('content_values')) if settings.has_key?('content_values')
|
35
38
|
|
36
|
-
#
|
39
|
+
#puts settings.inspect
|
37
40
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
end
|
45
|
-
}
|
46
|
-
|
47
|
-
#required_fields.each do |field|
|
48
|
-
# constraints.concat(Sumavisos::Parsers::Constraint.build_constraint(field, [:exist, :not_blank]).dup)
|
49
|
-
#end
|
41
|
+
#depth_name = nil
|
42
|
+
#setting_to_constraints(depth, settings).each { |cc|
|
43
|
+
#cc.depth = depth_name
|
44
|
+
# @constraints.push(cc) #Bliss::Constraint.new(depth_name, cc.setting))
|
45
|
+
#}
|
46
|
+
@constraints.concat(Bliss::Format.settings_to_constraints(depth, settings))
|
50
47
|
|
51
|
-
###
|
52
|
-
|
53
|
-
#puts "#{depth.join('/')}: #{settings.inspect}"
|
54
48
|
end
|
55
49
|
end
|
56
50
|
|
57
|
-
|
58
|
-
|
59
|
-
return constraints
|
51
|
+
return @constraints
|
60
52
|
end
|
61
|
-
|
62
|
-
# during parsing
|
63
|
-
# Sumavisos::Parsers::Validator.check_constraints(ad, constraints.select{|c| [:not_checked, :passed].include?(c.state)})
|
64
|
-
|
65
|
-
# @constraints.select{|c| c.state == :not_passed }.collect(&:detail)
|
66
|
-
|
67
|
-
def ad_constraints(root, vertical)
|
68
|
-
#required_fields = Sumavisos::Parsers::Validator::FIELDS['all']['required'].dup
|
69
|
-
#required_fields.concat(Sumavisos::Parsers::Validator::FIELDS[vertical]['required'])
|
70
|
-
|
71
|
-
#constraints = []
|
72
|
-
#required_fields.each do |field|
|
73
|
-
# constraints.concat(Sumavisos::Parsers::Constraint.build_constraint(field, [:exist, :not_blank]).dup)
|
74
|
-
#end
|
75
|
-
|
76
|
-
if vertical == 'property'
|
77
|
-
constraints.concat(Sumavisos::Parsers::Constraint.build_constraint(['type'], [:possible_values], Sumavisos::Parsers::Validator::VALID_PROPERTY_TYPES).dup)
|
78
|
-
end
|
79
53
|
|
80
|
-
|
54
|
+
def self.settings_to_constraints(depth, settings)
|
55
|
+
# TODO perhaps the Constraint model should handle this
|
56
|
+
# e.g., constraint.add_depth (as array)
|
57
|
+
# then internally it creates xpath-like depth
|
58
|
+
|
59
|
+
current_constraints = []
|
60
|
+
depth_name = nil
|
61
|
+
content_values = nil
|
62
|
+
#puts "#{depth.join('/')}: #{settings.inspect}"
|
63
|
+
settings.each_pair { |setting, value|
|
64
|
+
case setting
|
65
|
+
when "tag_name_required"
|
66
|
+
if value == true
|
67
|
+
depth_name ||= depth.join('/')
|
68
|
+
current_constraints.push(Bliss::Constraint.new(depth_name, :tag_name_required))
|
69
|
+
end
|
70
|
+
when "tag_name_values"
|
71
|
+
depth_name = depth[0..-2].join('/')
|
72
|
+
depth_name << "/" if depth_name.size > 0
|
73
|
+
depth_name << "(#{value.join('|')})" # TODO esto funciona solo en el ultimo step del depth :/
|
74
|
+
when "content_values"
|
75
|
+
current_constraints.push(Bliss::Constraint.new(depth_name, :content_values, {:possible_values => value}))
|
76
|
+
end
|
77
|
+
}
|
78
|
+
current_constraints.each {|cc|
|
79
|
+
cc.depth = depth_name
|
80
|
+
}
|
81
|
+
current_constraints
|
81
82
|
end
|
82
83
|
|
83
|
-
def
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
84
|
+
#def open_tag_constraints(depth)
|
85
|
+
# # raise error if not depth.is_a? Array
|
86
|
+
# begin
|
87
|
+
# to_check_constraints = self.to_check_constraints.select {|c| [:tag_name_required].include?(c.setting) }.select {|c| Regexp.new(c.depth).match(depth) }
|
88
|
+
# rescue
|
89
|
+
# []
|
90
|
+
# end
|
91
|
+
#end
|
92
|
+
|
93
|
+
#def close_tag_constraints(depth)
|
94
|
+
# # raise error if not depth.is_a? Array
|
95
|
+
# begin
|
96
|
+
# to_check_constraints = self.to_check_constraints.select {|c| Regexp.new(c.depth.split('/')[0..-2].join('/')).match(depth) }
|
97
|
+
# rescue
|
98
|
+
# []
|
99
|
+
# end
|
100
|
+
#end
|
101
|
+
|
102
|
+
# constraint set model? constraints.valid.with_depth(['root', 'ads']) ???
|
103
|
+
def to_check_constraints
|
104
|
+
# raise error if not depth.is_a? Array
|
105
|
+
begin
|
106
|
+
to_check_constraints = constraints.select {|c| [:not_checked, :passed].include?(c.state) }
|
107
|
+
to_check_constraints
|
108
|
+
rescue
|
109
|
+
[]
|
92
110
|
end
|
111
|
+
end
|
93
112
|
|
94
|
-
|
113
|
+
def details
|
114
|
+
@constraints.collect(&:detail)
|
95
115
|
end
|
116
|
+
|
117
|
+
def error_details
|
118
|
+
@constraints.select {|c| c.state == :not_passed }.collect(&:detail)
|
119
|
+
end
|
120
|
+
|
121
|
+
# reset_constraints_state
|
122
|
+
# build_constraints
|
123
|
+
|
96
124
|
end
|
97
125
|
end
|
data/lib/bliss/parser.rb
CHANGED
@@ -14,10 +14,26 @@ module Bliss
|
|
14
14
|
|
15
15
|
@root = nil
|
16
16
|
@nodes = nil
|
17
|
+
@formats = []
|
17
18
|
|
18
19
|
on_root {}
|
19
20
|
end
|
20
21
|
|
22
|
+
def add_format(format)
|
23
|
+
@formats.push(format)
|
24
|
+
end
|
25
|
+
|
26
|
+
def load_constraints_on_parser_machine
|
27
|
+
@parser_machine.constraints(@formats.collect(&:constraints).flatten)
|
28
|
+
end
|
29
|
+
|
30
|
+
def formats_details
|
31
|
+
@formats.each do |format|
|
32
|
+
puts format.details.inspect
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# deprecate this, use depth at on_tag_open or on_tag_close instead
|
21
37
|
def on_root(&block)
|
22
38
|
return false if not block.is_a? Proc
|
23
39
|
@parser_machine.on_root { |root|
|
@@ -26,23 +42,23 @@ module Bliss
|
|
26
42
|
}
|
27
43
|
end
|
28
44
|
|
29
|
-
def on_tag_open(element='
|
45
|
+
def on_tag_open(element='.', &block)
|
30
46
|
return false if block.arity != 1
|
31
47
|
|
32
48
|
overriden_block = Proc.new { |depth|
|
33
49
|
if not element == 'default'
|
34
50
|
reset_unhandled_bytes
|
35
51
|
end
|
52
|
+
|
36
53
|
block.call(depth)
|
37
54
|
}
|
38
55
|
@parser_machine.on_tag_open(element, overriden_block)
|
39
56
|
end
|
40
57
|
|
41
|
-
def on_tag_close(element='
|
58
|
+
def on_tag_close(element='.', &block)
|
42
59
|
overriden_block = Proc.new { |hash, depth|
|
43
|
-
|
44
|
-
|
45
|
-
#end
|
60
|
+
reset_unhandled_bytes
|
61
|
+
|
46
62
|
block.call(hash, depth)
|
47
63
|
}
|
48
64
|
@parser_machine.on_tag_close(element, overriden_block)
|
@@ -53,6 +69,11 @@ module Bliss
|
|
53
69
|
@on_max_unhandled_bytes = block
|
54
70
|
end
|
55
71
|
|
72
|
+
def on_timeout(seconds, &block)
|
73
|
+
@timeout = seconds
|
74
|
+
@on_timeout = block
|
75
|
+
end
|
76
|
+
|
56
77
|
def wait_tag_close(element)
|
57
78
|
@wait_tag_close = "</#{element}>"
|
58
79
|
end
|
@@ -68,7 +89,6 @@ module Bliss
|
|
68
89
|
@on_max_unhandled_bytes.call
|
69
90
|
@on_max_unhandled_bytes = nil
|
70
91
|
end
|
71
|
-
#self.close
|
72
92
|
end
|
73
93
|
end
|
74
94
|
|
@@ -93,9 +113,15 @@ module Bliss
|
|
93
113
|
|
94
114
|
def parse
|
95
115
|
reset_unhandled_bytes if check_unhandled_bytes?
|
116
|
+
load_constraints_on_parser_machine
|
96
117
|
|
97
118
|
EM.run do
|
98
|
-
http =
|
119
|
+
http = nil
|
120
|
+
if @timeout
|
121
|
+
http = EM::HttpRequest.new(@path, :connect_timeout => @timeout, :inactivity_timeout => @timeout).get
|
122
|
+
else
|
123
|
+
http = EM::HttpRequest.new(@path).get
|
124
|
+
end
|
99
125
|
|
100
126
|
@autodetect_compression = true
|
101
127
|
compression = :none
|
@@ -154,6 +180,9 @@ module Bliss
|
|
154
180
|
}
|
155
181
|
http.errback {
|
156
182
|
#puts 'errback'
|
183
|
+
if @timeout
|
184
|
+
@on_timeout.call
|
185
|
+
end
|
157
186
|
secure_close
|
158
187
|
}
|
159
188
|
http.callback {
|
@@ -167,12 +196,6 @@ module Bliss
|
|
167
196
|
file_close
|
168
197
|
end
|
169
198
|
|
170
|
-
def autodetect_compression(http)
|
171
|
-
#compression = :none
|
172
|
-
puts compression
|
173
|
-
return compression
|
174
|
-
end
|
175
|
-
|
176
199
|
def handle_wait_tag_close(chunk)
|
177
200
|
begin
|
178
201
|
last_index = chunk.index(@wait_tag_close)
|
@@ -209,9 +232,3 @@ module Bliss
|
|
209
232
|
|
210
233
|
end
|
211
234
|
end
|
212
|
-
|
213
|
-
#require 'stringio'
|
214
|
-
#str = StringIO.new
|
215
|
-
#z = Zlib::GzipWriter.new(str)
|
216
|
-
#z.write(txt)
|
217
|
-
#z.close
|
data/lib/bliss/parser_machine.rb
CHANGED
@@ -13,20 +13,28 @@ module Bliss
|
|
13
13
|
@on_tag_open = {}
|
14
14
|
@on_tag_close = {}
|
15
15
|
|
16
|
+
@constraints = []
|
17
|
+
|
16
18
|
@closed = false
|
17
19
|
|
18
20
|
end
|
19
21
|
|
22
|
+
def constraints(constraints)
|
23
|
+
@constraints = constraints
|
24
|
+
end
|
25
|
+
|
20
26
|
def on_root(&block)
|
21
27
|
@on_root = block
|
22
28
|
end
|
23
29
|
|
24
30
|
def on_tag_open(element, block)
|
25
|
-
@on_tag_open.merge!({element => block})
|
31
|
+
@on_tag_open.merge!({Regexp.new("#{element}$") => block})
|
26
32
|
end
|
27
33
|
|
28
34
|
def on_tag_close(element, block)
|
29
|
-
|
35
|
+
# TODO
|
36
|
+
# check how do we want to handle on_tag_close depths (xpath, array, another)
|
37
|
+
@on_tag_close.merge!({Regexp.new("#{element}$") => block})
|
30
38
|
end
|
31
39
|
|
32
40
|
def close
|
@@ -50,10 +58,19 @@ module Bliss
|
|
50
58
|
|
51
59
|
@depth.push(element) if @depth.last != element
|
52
60
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
61
|
+
# TODO search on hash with xpath style
|
62
|
+
# for example:
|
63
|
+
# keys: */ad/url
|
64
|
+
# keys: root/ad/url
|
65
|
+
# @on_tag_close.keys.select {|key| @depth.match(key)}
|
66
|
+
|
67
|
+
# other example:
|
68
|
+
# keys: root/(ad|AD)/description
|
69
|
+
##
|
70
|
+
|
71
|
+
search_key = @depth.join('/') # element
|
72
|
+
@on_tag_open.keys.select{ |r| search_key.match(r) }.each do |reg|
|
73
|
+
@on_tag_open[reg].call(@depth)
|
57
74
|
end
|
58
75
|
|
59
76
|
current = @nodes.pair_at_chain(@depth)
|
@@ -81,6 +98,22 @@ module Bliss
|
|
81
98
|
@current_content = ''
|
82
99
|
end
|
83
100
|
|
101
|
+
=begin
|
102
|
+
def open_tag_regexps
|
103
|
+
return @open_tag_regexps if @open_tag_regexps
|
104
|
+
|
105
|
+
@open_tag_regexps = @on_tag_open.keys.collect {|key| Regexp.new(key) }
|
106
|
+
@open_tag_regexps
|
107
|
+
end
|
108
|
+
|
109
|
+
def close_tag_regexps
|
110
|
+
return @close_tag_regexps if @close_tag_regexps
|
111
|
+
|
112
|
+
@close_tag_regexps = @on_tag_close.keys.collect {|key| Regexp.new(key) }
|
113
|
+
@close_tag_regexps
|
114
|
+
end
|
115
|
+
=end
|
116
|
+
|
84
117
|
def characters(string)
|
85
118
|
return if is_closed?
|
86
119
|
concat_content(string)
|
@@ -107,11 +140,37 @@ module Bliss
|
|
107
140
|
end
|
108
141
|
end
|
109
142
|
@current_content = ''
|
143
|
+
|
144
|
+
# TODO search on hash with xpath style
|
145
|
+
# for example:
|
146
|
+
# keys: */ad/url
|
147
|
+
# keys: root/ad/url
|
148
|
+
# @on_tag_close.keys.select {|key| @depth.match(key)}
|
149
|
+
##
|
150
|
+
|
151
|
+
search_key = @depth.join('/') # element
|
110
152
|
|
111
|
-
if @
|
112
|
-
|
113
|
-
|
114
|
-
@
|
153
|
+
if @depth.last == 'ad'
|
154
|
+
#puts search_key
|
155
|
+
#puts value_at.keys.inspect
|
156
|
+
#ad array #puts @constraints.select{|c| search_key.match(Regexp.new("#{c.depth.split('/').join('/')}$"))}.inspect
|
157
|
+
#puts current.keys.inspect
|
158
|
+
# others puts @constraints.select{|c| search_key.match(Regexp.new("#{c.depth.split('/')[0..-2].join('/')}$"))}.inspect
|
159
|
+
end
|
160
|
+
|
161
|
+
@on_tag_close.keys.select{ |r| search_key.match(r) }.each do |reg|
|
162
|
+
@on_tag_close[reg].call(value_at, @depth)
|
163
|
+
end
|
164
|
+
# TODO constraint should return Regexp like depth too
|
165
|
+
|
166
|
+
#puts @constraints.collect(&:state).inspect
|
167
|
+
|
168
|
+
@constraints.select{|c| [:not_checked, :passed].include?(c.state) }.select {|c| search_key.match(Regexp.new("#{c.depth.split('/').join('/')}$")) }.each do |constraint|
|
169
|
+
#puts "search_key: #{search_key}"
|
170
|
+
#puts "value_at.inspect: #{value_at.inspect}"
|
171
|
+
#puts "current.inspect: #{current.inspect}"
|
172
|
+
|
173
|
+
constraint.run!(current)
|
115
174
|
end
|
116
175
|
|
117
176
|
@depth.pop if @depth.last == element
|
@@ -125,7 +184,7 @@ module Bliss
|
|
125
184
|
end
|
126
185
|
|
127
186
|
def end_document
|
128
|
-
puts @nodes.inspect
|
187
|
+
#puts @nodes.inspect
|
129
188
|
end
|
130
189
|
end
|
131
190
|
end
|
data/spec.yml
CHANGED
@@ -1,23 +1,27 @@
|
|
1
1
|
# TODO content_type = url
|
2
2
|
---
|
3
|
-
|
3
|
+
trovit:
|
4
|
+
tag_name_values: [ root, trovit, sumavisos ]
|
5
|
+
ad: &ad
|
6
|
+
id:
|
7
|
+
content_type: numeric
|
8
|
+
description:
|
9
|
+
tag_name_values: [ description, content ]
|
10
|
+
content_type: string
|
11
|
+
pictures:
|
12
|
+
tag_name_required: false
|
13
|
+
picture:
|
14
|
+
tag_name_required: false
|
15
|
+
picture_url:
|
16
|
+
tag_name_required: true
|
17
|
+
tag_name_values: [ url, picture_url ]
|
18
|
+
content_type: string
|
19
|
+
content_format: /http:\/\/\w+/
|
20
|
+
url:
|
21
|
+
content_format: /http:\/\/\w+/
|
22
|
+
date:
|
23
|
+
content_type: date
|
4
24
|
ads:
|
5
|
-
|
25
|
+
tag_name_required: false
|
6
26
|
ad:
|
7
|
-
|
8
|
-
content_type: numeric
|
9
|
-
description:
|
10
|
-
tag_name_values: [ description, content ]
|
11
|
-
content_type: string
|
12
|
-
pictures:
|
13
|
-
tag_name_required: false
|
14
|
-
picture:
|
15
|
-
tag_name_required: false
|
16
|
-
url:
|
17
|
-
tag_name_required: true
|
18
|
-
content_type: string
|
19
|
-
content_format: /http:\/\/\w+/
|
20
|
-
url:
|
21
|
-
content_format: /http:\/\/\w+/
|
22
|
-
date:
|
23
|
-
content_type: date
|
27
|
+
<<: *ad
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Bliss::Constraint do
|
4
|
+
describe 'run!' do
|
5
|
+
it 'should pass' do
|
6
|
+
constraint = Bliss::Constraint.new("root", :tag_name_required)
|
7
|
+
constraint.run!({'root' => {'tag_1' => 'test', 'tag_2' => 'test'}})
|
8
|
+
constraint.state.should == :passed
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'should pass too' do
|
12
|
+
constraint = Bliss::Constraint.new("(root|ROOT)", :tag_name_required)
|
13
|
+
constraint.run!({'ROOT' => {'tag_1' => 'test', 'tag_2' => 'test'}})
|
14
|
+
constraint.state.should == :passed
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'should not pass' do
|
18
|
+
constraint = Bliss::Constraint.new("(root|ROOT)", :tag_name_required)
|
19
|
+
constraint.run!({'another' => {'tag_1' => 'test', 'tag_2' => 'test'}})
|
20
|
+
constraint.state.should == :not_passed
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/spec/format_spec.rb
CHANGED
@@ -4,60 +4,34 @@ require 'spec_helper'
|
|
4
4
|
|
5
5
|
describe Bliss::Format do
|
6
6
|
before do
|
7
|
-
#@openx_banner = mock(OpenX::Services::Banner)
|
8
7
|
@format = Bliss::Format.new
|
9
8
|
end
|
10
9
|
|
11
10
|
describe '.constraints' do
|
12
|
-
#before do
|
13
|
-
#end
|
14
|
-
|
15
11
|
it 'should do it' do
|
16
|
-
@format.constraints.
|
12
|
+
@format.constraints.should be_a(Array)
|
13
|
+
#@format.constraints.size.should == 8
|
17
14
|
end
|
18
15
|
end
|
19
16
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
# it 'should return statistics' do
|
28
|
-
# @banner.traffic(Date.today, Date.today).should be_kind_of(Hash)
|
29
|
-
# end
|
30
|
-
#end
|
31
|
-
|
32
|
-
# describe '.created' do
|
33
|
-
# context 'when last creation is less than 2 days ago' do
|
34
|
-
# before do
|
35
|
-
# @site.stub(:last_creation_day_in_week) { Date.today - 1 }
|
36
|
-
# end
|
37
|
-
|
38
|
-
# it 'should be ok' do
|
39
|
-
# @site_evaluation.created[@site.id]['created'][0].should == 'ok'
|
40
|
-
# end
|
41
|
-
# end
|
42
|
-
|
43
|
-
# context 'when last creation is between 2 and 7 days ago' do
|
44
|
-
# before do
|
45
|
-
# @site.stub(:last_creation_day_in_week) { Date.today - 3 }
|
46
|
-
# end
|
47
|
-
|
48
|
-
# it 'should be a warning' do
|
49
|
-
# @site_evaluation.created[@site.id]['created'][0].should == 'warning'
|
50
|
-
# end
|
51
|
-
# end
|
17
|
+
describe '#settings_to_constraints' do
|
18
|
+
it 'should return an array with a Bliss::Constraint object' do
|
19
|
+
constraints = Bliss::Format.settings_to_constraints(['root'], {'tag_name_required' => true})
|
20
|
+
constraints.should be_a(Array)
|
21
|
+
constraints.size.should == 1
|
22
|
+
constraints.first.should be_a(Bliss::Constraint)
|
23
|
+
end
|
52
24
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
25
|
+
it 'should have depth and setting loaded' do
|
26
|
+
constraints = Bliss::Format.settings_to_constraints(['root'], {'tag_name_required' => true})
|
27
|
+
constraints.first.depth.should == 'root'
|
28
|
+
constraints.first.setting.should == :tag_name_required
|
29
|
+
end
|
57
30
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
31
|
+
it 'should have multiple depths' do
|
32
|
+
constraints = Bliss::Format.settings_to_constraints(['root'], {'tag_name_required' => true, 'tag_name_values' => ['root', 'ROOT']})
|
33
|
+
constraints.first.depth.should == '(root|ROOT)'
|
34
|
+
constraints.first.setting.should == :tag_name_required
|
35
|
+
end
|
36
|
+
end
|
63
37
|
end
|
data/spec/parser_spec.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
#require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
3
|
+
#require_dependency 'xmlrpc/client'
|
4
|
+
|
5
|
+
describe Bliss::Parser do
|
6
|
+
before do
|
7
|
+
@parser = Bliss::Parser.new('http://www.topdiffusion.com/flux/topdiffusion_adsdeck.xml')
|
8
|
+
@format = Bliss::Format.new
|
9
|
+
@parser.add_format(@format)
|
10
|
+
end
|
11
|
+
|
12
|
+
context 'when parsing a valid document' do
|
13
|
+
before do
|
14
|
+
begin
|
15
|
+
@parser.parse
|
16
|
+
rescue
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
describe '.formats_details' do
|
21
|
+
it 'should have all required keys as existing' do
|
22
|
+
puts @parser.formats_details.inspect
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/test.rb
CHANGED
@@ -3,14 +3,17 @@ require 'bliss'
|
|
3
3
|
|
4
4
|
p = Bliss::Parser.new('', 'output.xml')
|
5
5
|
p.wait_tag_close('ad')
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
#}
|
6
|
+
p.on_max_unhandled_bytes(20000) {
|
7
|
+
puts 'Unhandled bytes!'
|
8
|
+
}
|
10
9
|
|
11
10
|
@count = 0
|
12
11
|
@makes = 0
|
13
12
|
|
13
|
+
f = Bliss::Format.new
|
14
|
+
|
15
|
+
p.add_format(f)
|
16
|
+
|
14
17
|
p.on_tag_close('ad') { |hash, depth|
|
15
18
|
if hash.has_key?('make')
|
16
19
|
@makes += 1
|
@@ -45,3 +48,5 @@ end
|
|
45
48
|
|
46
49
|
puts @count
|
47
50
|
puts @makes
|
51
|
+
|
52
|
+
puts p.formats_details.inspect
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bliss
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-06-
|
12
|
+
date: 2012-06-12 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
16
|
-
requirement: &
|
16
|
+
requirement: &8121280 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 1.5.2
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *8121280
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: eventmachine
|
27
|
-
requirement: &
|
27
|
+
requirement: &8120500 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 1.0.0.beta.4
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *8120500
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: em-http-request
|
38
|
-
requirement: &
|
38
|
+
requirement: &8119480 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 1.0.2
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *8119480
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: rspec
|
49
|
-
requirement: &
|
49
|
+
requirement: &8118780 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ~>
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: 2.8.0
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *8118780
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: bundler
|
60
|
-
requirement: &
|
60
|
+
requirement: &8117900 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: 1.1.3
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *8117900
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: jeweler
|
71
|
-
requirement: &
|
71
|
+
requirement: &8132400 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ~>
|
@@ -76,10 +76,10 @@ dependencies:
|
|
76
76
|
version: 1.6.4
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *8132400
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
81
|
name: simplecov
|
82
|
-
requirement: &
|
82
|
+
requirement: &8131260 !ruby/object:Gem::Requirement
|
83
83
|
none: false
|
84
84
|
requirements:
|
85
85
|
- - ! '>='
|
@@ -87,7 +87,7 @@ dependencies:
|
|
87
87
|
version: '0'
|
88
88
|
type: :development
|
89
89
|
prerelease: false
|
90
|
-
version_requirements: *
|
90
|
+
version_requirements: *8131260
|
91
91
|
description: streamed xml parsing tool
|
92
92
|
email: krakatoa1987@gmail.com
|
93
93
|
executables: []
|
@@ -117,7 +117,9 @@ files:
|
|
117
117
|
- lib/bliss/parser_machine.rb
|
118
118
|
- lib/hash_extension.rb
|
119
119
|
- spec.yml
|
120
|
+
- spec/constraint_spec.rb
|
120
121
|
- spec/format_spec.rb
|
122
|
+
- spec/parser_spec.rb
|
121
123
|
- spec/spec_helper.rb
|
122
124
|
- test.rb
|
123
125
|
- test/helper.rb
|
@@ -137,7 +139,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
137
139
|
version: '0'
|
138
140
|
segments:
|
139
141
|
- 0
|
140
|
-
hash:
|
142
|
+
hash: -4543548141741406741
|
141
143
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
142
144
|
none: false
|
143
145
|
requirements:
|