bliss 0.0.9 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.rdoc +5 -0
- data/README.rdoc +11 -7
- data/VERSION +1 -1
- data/bliss.gemspec +4 -2
- data/lib/bliss/constraint.rb +68 -33
- data/lib/bliss/format.rb +84 -56
- data/lib/bliss/parser.rb +36 -19
- data/lib/bliss/parser_machine.rb +70 -11
- data/spec.yml +23 -19
- data/spec/constraint_spec.rb +23 -0
- data/spec/format_spec.rb +20 -46
- data/spec/parser_spec.rb +26 -0
- data/test.rb +9 -4
- metadata +19 -17
data/CHANGELOG.rdoc
CHANGED
@@ -1,7 +1,12 @@
|
|
1
|
+
== 0.1.0
|
2
|
+
* Added support for (possible) "content_values" on Bliss::Format.
|
3
|
+
|
1
4
|
== 0.0.9
|
2
5
|
|
3
6
|
* Features
|
4
7
|
|
8
|
+
* Added on_timeout callback.
|
9
|
+
* Introduced Bliss::Format, which linked to a Bliss::Parser are checked during parsing process. They are defined with a bunch of specs given as YAML.
|
5
10
|
* added support for Gzip (content type is autodetected).
|
6
11
|
* on_max_unhandled_bytes callback block. Which receives the amount of bytes and a block to execute when that limit is reached.
|
7
12
|
* on_tag_close maybe used without a specific tag name, and block now handles "current depth" array.
|
data/README.rdoc
CHANGED
@@ -3,26 +3,30 @@
|
|
3
3
|
require 'rubygems'
|
4
4
|
require 'bliss'
|
5
5
|
|
6
|
-
path = 'http://www.yourdomain.com/input.xml'
|
6
|
+
path = 'http://www.yourdomain.com/input.xml' # it supports tar.gz too!
|
7
7
|
|
8
|
-
parser = Bliss::
|
8
|
+
parser = Bliss::Parser.new(path, 'output.xml') # outputs read xml
|
9
9
|
count = 0
|
10
|
-
|
11
|
-
puts
|
10
|
+
parser.on_max_unhandled_bytes(20000) {
|
11
|
+
puts 'Stopped parsing caused content data for tag was too big!'
|
12
|
+
parser.close
|
12
13
|
}
|
13
|
-
parser.on_tag_open('ad') { |depth|
|
14
|
+
parser.on_tag_open('ads/ad') { |depth|
|
14
15
|
puts depth.inspect
|
15
16
|
}
|
16
|
-
parser.on_tag_close('ad') { |hash|
|
17
|
+
parser.on_tag_close('ads/ad') { |hash, depth|
|
17
18
|
count += 1
|
18
19
|
puts hash.inspect
|
19
20
|
if count == 4
|
20
21
|
parser.close
|
21
22
|
end
|
22
23
|
}
|
24
|
+
|
25
|
+
parser.on_timeout(5) {
|
26
|
+
puts 'Timeout!'
|
27
|
+
}
|
23
28
|
|
24
29
|
parser.parse
|
25
|
-
puts "Root: #{parser.root}"
|
26
30
|
|
27
31
|
== Contributing to bliss
|
28
32
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0
|
1
|
+
0.1.0
|
data/bliss.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "bliss"
|
8
|
-
s.version = "0.0
|
8
|
+
s.version = "0.1.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Fernando Alonso"]
|
12
|
-
s.date = "2012-06-
|
12
|
+
s.date = "2012-06-12"
|
13
13
|
s.description = "streamed xml parsing tool"
|
14
14
|
s.email = "krakatoa1987@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -38,7 +38,9 @@ Gem::Specification.new do |s|
|
|
38
38
|
"lib/bliss/parser_machine.rb",
|
39
39
|
"lib/hash_extension.rb",
|
40
40
|
"spec.yml",
|
41
|
+
"spec/constraint_spec.rb",
|
41
42
|
"spec/format_spec.rb",
|
43
|
+
"spec/parser_spec.rb",
|
42
44
|
"spec/spec_helper.rb",
|
43
45
|
"test.rb",
|
44
46
|
"test/helper.rb",
|
data/lib/bliss/constraint.rb
CHANGED
@@ -1,30 +1,22 @@
|
|
1
1
|
module Bliss
|
2
2
|
class Constraint
|
3
|
-
|
4
|
-
attr_reader :
|
5
|
-
|
6
|
-
#TYPES = [:exist, :not_blank, :possible_values]
|
3
|
+
attr_accessor :depth, :possible_values
|
4
|
+
attr_reader :setting, :state
|
7
5
|
|
8
6
|
def initialize(depth, setting, params={})
|
9
7
|
@depth = depth
|
10
8
|
@setting = setting
|
9
|
+
@possible_values = params[:possible_values].collect(&:to_s) if params.has_key?(:possible_values)
|
11
10
|
|
12
11
|
@state = :not_checked
|
13
12
|
end
|
14
13
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
# else
|
19
|
-
# @field = [field]
|
20
|
-
# end
|
21
|
-
# @type = type
|
22
|
-
# @possible_values = possible_values
|
23
|
-
#
|
24
|
-
# @state = :not_checked
|
25
|
-
#end
|
14
|
+
def tag_names
|
15
|
+
@depth.split('/').last.gsub('(', '').gsub(')', '').split('|')
|
16
|
+
end
|
26
17
|
|
27
|
-
|
18
|
+
# TODO should exist another method passed! for tag_name_required ?
|
19
|
+
def run!(hash=nil)
|
28
20
|
@state = :not_checked
|
29
21
|
#@field.each do |field|
|
30
22
|
#if @state == :passed
|
@@ -32,38 +24,81 @@ module Bliss
|
|
32
24
|
#end
|
33
25
|
case @setting
|
34
26
|
when :tag_name_required
|
35
|
-
|
36
|
-
|
27
|
+
content = nil
|
28
|
+
if hash
|
29
|
+
#puts "#{@depth.inspect} - required: #{required.inspect}"
|
30
|
+
|
31
|
+
found = false
|
32
|
+
self.tag_names.each do |key|
|
33
|
+
if hash.keys.include?(key)
|
34
|
+
found = true
|
35
|
+
break
|
36
|
+
end
|
37
|
+
end
|
38
|
+
if found
|
39
|
+
@state = :passed
|
40
|
+
else
|
41
|
+
@state = :not_passed
|
42
|
+
end
|
37
43
|
else
|
38
44
|
@state = :passed
|
39
45
|
end
|
46
|
+
when :content_values
|
47
|
+
if hash
|
48
|
+
found = false
|
49
|
+
self.tag_names.each do |key|
|
50
|
+
content = hash[key]
|
51
|
+
puts content
|
52
|
+
puts @possible_values.inspect
|
53
|
+
if @possible_values.include?(content)
|
54
|
+
found = true
|
55
|
+
break
|
56
|
+
end
|
57
|
+
end
|
58
|
+
if found
|
59
|
+
@state = :passed
|
60
|
+
else
|
61
|
+
@state = :not_passed
|
62
|
+
end
|
63
|
+
end
|
40
64
|
#when :not_blank
|
41
65
|
# if hash.has_key?(field) and !hash[field].to_s.empty?
|
42
66
|
# @state = :passed
|
43
67
|
# else
|
44
68
|
# @state = :not_passed
|
45
69
|
# end
|
46
|
-
#when :possible_values
|
47
|
-
# if hash.has_key?(field) and @possible_values.include?(hash[field])
|
48
|
-
# @state = :passed
|
49
|
-
# else
|
50
|
-
# @state = :not_passed
|
51
|
-
# end
|
52
70
|
end
|
53
71
|
#end
|
54
72
|
@state
|
55
73
|
end
|
56
74
|
|
75
|
+
def ended!
|
76
|
+
case @setting
|
77
|
+
when :tag_name_required
|
78
|
+
if @state == :not_checked
|
79
|
+
@state = :not_passed
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
57
84
|
def detail
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
85
|
+
self.ended! # TODO esto es una chota de codigo groncho!
|
86
|
+
|
87
|
+
case @state
|
88
|
+
when :not_passed
|
89
|
+
case @setting
|
90
|
+
when :tag_name_required
|
91
|
+
[@depth, "missing"]
|
92
|
+
#when :not_blank
|
93
|
+
# [@field.join(" or "), "blank"]
|
94
|
+
#when :possible_values
|
95
|
+
# [@field.join(" or "), "invalid"]
|
96
|
+
end
|
97
|
+
when :passed
|
98
|
+
case @setting
|
99
|
+
when :tag_name_required
|
100
|
+
[@depth, "exists"]
|
101
|
+
end
|
67
102
|
end
|
68
103
|
end
|
69
104
|
|
data/lib/bliss/format.rb
CHANGED
@@ -4,9 +4,8 @@ module Bliss
|
|
4
4
|
class Format
|
5
5
|
@@keywords = %w{ tag_name_required content_required tag_name_type content_type tag_name_format content_format tag_name_values content_values }
|
6
6
|
|
7
|
-
def initialize
|
8
|
-
|
9
|
-
self.specifications = yml
|
7
|
+
def initialize(filepath)
|
8
|
+
self.specifications = YAML.load_file(filepath)
|
10
9
|
end
|
11
10
|
|
12
11
|
# TODO for debugging only!
|
@@ -21,77 +20,106 @@ module Bliss
|
|
21
20
|
|
22
21
|
def constraints
|
23
22
|
return [] if not (@specs.is_a? Hash and @specs.size > 0)
|
23
|
+
return @constraints if @constraints
|
24
24
|
|
25
|
-
constraints = []
|
25
|
+
@constraints = []
|
26
26
|
|
27
27
|
@specs.recurse(true) do |depth, value|
|
28
|
-
if !@@keywords.include?(depth.last)
|
29
|
-
settings =
|
28
|
+
if value.is_a? Hash and !@@keywords.include?(depth.last)
|
29
|
+
settings = value.select { |key| @@keywords.include?(key) }
|
30
30
|
end
|
31
|
-
|
31
|
+
#settings = @specs.value_at_chain(depth).select{|key| @@keywords.include?(key) }
|
32
|
+
if settings.is_a? Hash and !@@keywords.include?(depth.last)
|
32
33
|
settings.merge!({"tag_name_required" => true}) if not settings.has_key?("tag_name_required")
|
33
34
|
|
34
|
-
|
35
|
+
# TODO this is an ugly way to move tag_name_values to the end!
|
36
|
+
settings.store('tag_name_values', settings.delete('tag_name_values')) if settings.has_key?('tag_name_values')
|
37
|
+
settings.store('content_values', settings.delete('content_values')) if settings.has_key?('content_values')
|
35
38
|
|
36
|
-
#
|
39
|
+
#puts settings.inspect
|
37
40
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
end
|
45
|
-
}
|
46
|
-
|
47
|
-
#required_fields.each do |field|
|
48
|
-
# constraints.concat(Sumavisos::Parsers::Constraint.build_constraint(field, [:exist, :not_blank]).dup)
|
49
|
-
#end
|
41
|
+
#depth_name = nil
|
42
|
+
#setting_to_constraints(depth, settings).each { |cc|
|
43
|
+
#cc.depth = depth_name
|
44
|
+
# @constraints.push(cc) #Bliss::Constraint.new(depth_name, cc.setting))
|
45
|
+
#}
|
46
|
+
@constraints.concat(Bliss::Format.settings_to_constraints(depth, settings))
|
50
47
|
|
51
|
-
###
|
52
|
-
|
53
|
-
#puts "#{depth.join('/')}: #{settings.inspect}"
|
54
48
|
end
|
55
49
|
end
|
56
50
|
|
57
|
-
|
58
|
-
|
59
|
-
return constraints
|
51
|
+
return @constraints
|
60
52
|
end
|
61
|
-
|
62
|
-
# during parsing
|
63
|
-
# Sumavisos::Parsers::Validator.check_constraints(ad, constraints.select{|c| [:not_checked, :passed].include?(c.state)})
|
64
|
-
|
65
|
-
# @constraints.select{|c| c.state == :not_passed }.collect(&:detail)
|
66
|
-
|
67
|
-
def ad_constraints(root, vertical)
|
68
|
-
#required_fields = Sumavisos::Parsers::Validator::FIELDS['all']['required'].dup
|
69
|
-
#required_fields.concat(Sumavisos::Parsers::Validator::FIELDS[vertical]['required'])
|
70
|
-
|
71
|
-
#constraints = []
|
72
|
-
#required_fields.each do |field|
|
73
|
-
# constraints.concat(Sumavisos::Parsers::Constraint.build_constraint(field, [:exist, :not_blank]).dup)
|
74
|
-
#end
|
75
|
-
|
76
|
-
if vertical == 'property'
|
77
|
-
constraints.concat(Sumavisos::Parsers::Constraint.build_constraint(['type'], [:possible_values], Sumavisos::Parsers::Validator::VALID_PROPERTY_TYPES).dup)
|
78
|
-
end
|
79
53
|
|
80
|
-
|
54
|
+
def self.settings_to_constraints(depth, settings)
|
55
|
+
# TODO perhaps the Constraint model should handle this
|
56
|
+
# e.g., constraint.add_depth (as array)
|
57
|
+
# then internally it creates xpath-like depth
|
58
|
+
|
59
|
+
current_constraints = []
|
60
|
+
depth_name = nil
|
61
|
+
content_values = nil
|
62
|
+
#puts "#{depth.join('/')}: #{settings.inspect}"
|
63
|
+
settings.each_pair { |setting, value|
|
64
|
+
case setting
|
65
|
+
when "tag_name_required"
|
66
|
+
if value == true
|
67
|
+
depth_name ||= depth.join('/')
|
68
|
+
current_constraints.push(Bliss::Constraint.new(depth_name, :tag_name_required))
|
69
|
+
end
|
70
|
+
when "tag_name_values"
|
71
|
+
depth_name = depth[0..-2].join('/')
|
72
|
+
depth_name << "/" if depth_name.size > 0
|
73
|
+
depth_name << "(#{value.join('|')})" # TODO esto funciona solo en el ultimo step del depth :/
|
74
|
+
when "content_values"
|
75
|
+
current_constraints.push(Bliss::Constraint.new(depth_name, :content_values, {:possible_values => value}))
|
76
|
+
end
|
77
|
+
}
|
78
|
+
current_constraints.each {|cc|
|
79
|
+
cc.depth = depth_name
|
80
|
+
}
|
81
|
+
current_constraints
|
81
82
|
end
|
82
83
|
|
83
|
-
def
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
84
|
+
#def open_tag_constraints(depth)
|
85
|
+
# # raise error if not depth.is_a? Array
|
86
|
+
# begin
|
87
|
+
# to_check_constraints = self.to_check_constraints.select {|c| [:tag_name_required].include?(c.setting) }.select {|c| Regexp.new(c.depth).match(depth) }
|
88
|
+
# rescue
|
89
|
+
# []
|
90
|
+
# end
|
91
|
+
#end
|
92
|
+
|
93
|
+
#def close_tag_constraints(depth)
|
94
|
+
# # raise error if not depth.is_a? Array
|
95
|
+
# begin
|
96
|
+
# to_check_constraints = self.to_check_constraints.select {|c| Regexp.new(c.depth.split('/')[0..-2].join('/')).match(depth) }
|
97
|
+
# rescue
|
98
|
+
# []
|
99
|
+
# end
|
100
|
+
#end
|
101
|
+
|
102
|
+
# constraint set model? constraints.valid.with_depth(['root', 'ads']) ???
|
103
|
+
def to_check_constraints
|
104
|
+
# raise error if not depth.is_a? Array
|
105
|
+
begin
|
106
|
+
to_check_constraints = constraints.select {|c| [:not_checked, :passed].include?(c.state) }
|
107
|
+
to_check_constraints
|
108
|
+
rescue
|
109
|
+
[]
|
92
110
|
end
|
111
|
+
end
|
93
112
|
|
94
|
-
|
113
|
+
def details
|
114
|
+
@constraints.collect(&:detail)
|
95
115
|
end
|
116
|
+
|
117
|
+
def error_details
|
118
|
+
@constraints.select {|c| c.state == :not_passed }.collect(&:detail)
|
119
|
+
end
|
120
|
+
|
121
|
+
# reset_constraints_state
|
122
|
+
# build_constraints
|
123
|
+
|
96
124
|
end
|
97
125
|
end
|
data/lib/bliss/parser.rb
CHANGED
@@ -14,10 +14,26 @@ module Bliss
|
|
14
14
|
|
15
15
|
@root = nil
|
16
16
|
@nodes = nil
|
17
|
+
@formats = []
|
17
18
|
|
18
19
|
on_root {}
|
19
20
|
end
|
20
21
|
|
22
|
+
def add_format(format)
|
23
|
+
@formats.push(format)
|
24
|
+
end
|
25
|
+
|
26
|
+
def load_constraints_on_parser_machine
|
27
|
+
@parser_machine.constraints(@formats.collect(&:constraints).flatten)
|
28
|
+
end
|
29
|
+
|
30
|
+
def formats_details
|
31
|
+
@formats.each do |format|
|
32
|
+
puts format.details.inspect
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# deprecate this, use depth at on_tag_open or on_tag_close instead
|
21
37
|
def on_root(&block)
|
22
38
|
return false if not block.is_a? Proc
|
23
39
|
@parser_machine.on_root { |root|
|
@@ -26,23 +42,23 @@ module Bliss
|
|
26
42
|
}
|
27
43
|
end
|
28
44
|
|
29
|
-
def on_tag_open(element='
|
45
|
+
def on_tag_open(element='.', &block)
|
30
46
|
return false if block.arity != 1
|
31
47
|
|
32
48
|
overriden_block = Proc.new { |depth|
|
33
49
|
if not element == 'default'
|
34
50
|
reset_unhandled_bytes
|
35
51
|
end
|
52
|
+
|
36
53
|
block.call(depth)
|
37
54
|
}
|
38
55
|
@parser_machine.on_tag_open(element, overriden_block)
|
39
56
|
end
|
40
57
|
|
41
|
-
def on_tag_close(element='
|
58
|
+
def on_tag_close(element='.', &block)
|
42
59
|
overriden_block = Proc.new { |hash, depth|
|
43
|
-
|
44
|
-
|
45
|
-
#end
|
60
|
+
reset_unhandled_bytes
|
61
|
+
|
46
62
|
block.call(hash, depth)
|
47
63
|
}
|
48
64
|
@parser_machine.on_tag_close(element, overriden_block)
|
@@ -53,6 +69,11 @@ module Bliss
|
|
53
69
|
@on_max_unhandled_bytes = block
|
54
70
|
end
|
55
71
|
|
72
|
+
def on_timeout(seconds, &block)
|
73
|
+
@timeout = seconds
|
74
|
+
@on_timeout = block
|
75
|
+
end
|
76
|
+
|
56
77
|
def wait_tag_close(element)
|
57
78
|
@wait_tag_close = "</#{element}>"
|
58
79
|
end
|
@@ -68,7 +89,6 @@ module Bliss
|
|
68
89
|
@on_max_unhandled_bytes.call
|
69
90
|
@on_max_unhandled_bytes = nil
|
70
91
|
end
|
71
|
-
#self.close
|
72
92
|
end
|
73
93
|
end
|
74
94
|
|
@@ -93,9 +113,15 @@ module Bliss
|
|
93
113
|
|
94
114
|
def parse
|
95
115
|
reset_unhandled_bytes if check_unhandled_bytes?
|
116
|
+
load_constraints_on_parser_machine
|
96
117
|
|
97
118
|
EM.run do
|
98
|
-
http =
|
119
|
+
http = nil
|
120
|
+
if @timeout
|
121
|
+
http = EM::HttpRequest.new(@path, :connect_timeout => @timeout, :inactivity_timeout => @timeout).get
|
122
|
+
else
|
123
|
+
http = EM::HttpRequest.new(@path).get
|
124
|
+
end
|
99
125
|
|
100
126
|
@autodetect_compression = true
|
101
127
|
compression = :none
|
@@ -154,6 +180,9 @@ module Bliss
|
|
154
180
|
}
|
155
181
|
http.errback {
|
156
182
|
#puts 'errback'
|
183
|
+
if @timeout
|
184
|
+
@on_timeout.call
|
185
|
+
end
|
157
186
|
secure_close
|
158
187
|
}
|
159
188
|
http.callback {
|
@@ -167,12 +196,6 @@ module Bliss
|
|
167
196
|
file_close
|
168
197
|
end
|
169
198
|
|
170
|
-
def autodetect_compression(http)
|
171
|
-
#compression = :none
|
172
|
-
puts compression
|
173
|
-
return compression
|
174
|
-
end
|
175
|
-
|
176
199
|
def handle_wait_tag_close(chunk)
|
177
200
|
begin
|
178
201
|
last_index = chunk.index(@wait_tag_close)
|
@@ -209,9 +232,3 @@ module Bliss
|
|
209
232
|
|
210
233
|
end
|
211
234
|
end
|
212
|
-
|
213
|
-
#require 'stringio'
|
214
|
-
#str = StringIO.new
|
215
|
-
#z = Zlib::GzipWriter.new(str)
|
216
|
-
#z.write(txt)
|
217
|
-
#z.close
|
data/lib/bliss/parser_machine.rb
CHANGED
@@ -13,20 +13,28 @@ module Bliss
|
|
13
13
|
@on_tag_open = {}
|
14
14
|
@on_tag_close = {}
|
15
15
|
|
16
|
+
@constraints = []
|
17
|
+
|
16
18
|
@closed = false
|
17
19
|
|
18
20
|
end
|
19
21
|
|
22
|
+
def constraints(constraints)
|
23
|
+
@constraints = constraints
|
24
|
+
end
|
25
|
+
|
20
26
|
def on_root(&block)
|
21
27
|
@on_root = block
|
22
28
|
end
|
23
29
|
|
24
30
|
def on_tag_open(element, block)
|
25
|
-
@on_tag_open.merge!({element => block})
|
31
|
+
@on_tag_open.merge!({Regexp.new("#{element}$") => block})
|
26
32
|
end
|
27
33
|
|
28
34
|
def on_tag_close(element, block)
|
29
|
-
|
35
|
+
# TODO
|
36
|
+
# check how do we want to handle on_tag_close depths (xpath, array, another)
|
37
|
+
@on_tag_close.merge!({Regexp.new("#{element}$") => block})
|
30
38
|
end
|
31
39
|
|
32
40
|
def close
|
@@ -50,10 +58,19 @@ module Bliss
|
|
50
58
|
|
51
59
|
@depth.push(element) if @depth.last != element
|
52
60
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
61
|
+
# TODO search on hash with xpath style
|
62
|
+
# for example:
|
63
|
+
# keys: */ad/url
|
64
|
+
# keys: root/ad/url
|
65
|
+
# @on_tag_close.keys.select {|key| @depth.match(key)}
|
66
|
+
|
67
|
+
# other example:
|
68
|
+
# keys: root/(ad|AD)/description
|
69
|
+
##
|
70
|
+
|
71
|
+
search_key = @depth.join('/') # element
|
72
|
+
@on_tag_open.keys.select{ |r| search_key.match(r) }.each do |reg|
|
73
|
+
@on_tag_open[reg].call(@depth)
|
57
74
|
end
|
58
75
|
|
59
76
|
current = @nodes.pair_at_chain(@depth)
|
@@ -81,6 +98,22 @@ module Bliss
|
|
81
98
|
@current_content = ''
|
82
99
|
end
|
83
100
|
|
101
|
+
=begin
|
102
|
+
def open_tag_regexps
|
103
|
+
return @open_tag_regexps if @open_tag_regexps
|
104
|
+
|
105
|
+
@open_tag_regexps = @on_tag_open.keys.collect {|key| Regexp.new(key) }
|
106
|
+
@open_tag_regexps
|
107
|
+
end
|
108
|
+
|
109
|
+
def close_tag_regexps
|
110
|
+
return @close_tag_regexps if @close_tag_regexps
|
111
|
+
|
112
|
+
@close_tag_regexps = @on_tag_close.keys.collect {|key| Regexp.new(key) }
|
113
|
+
@close_tag_regexps
|
114
|
+
end
|
115
|
+
=end
|
116
|
+
|
84
117
|
def characters(string)
|
85
118
|
return if is_closed?
|
86
119
|
concat_content(string)
|
@@ -107,11 +140,37 @@ module Bliss
|
|
107
140
|
end
|
108
141
|
end
|
109
142
|
@current_content = ''
|
143
|
+
|
144
|
+
# TODO search on hash with xpath style
|
145
|
+
# for example:
|
146
|
+
# keys: */ad/url
|
147
|
+
# keys: root/ad/url
|
148
|
+
# @on_tag_close.keys.select {|key| @depth.match(key)}
|
149
|
+
##
|
150
|
+
|
151
|
+
search_key = @depth.join('/') # element
|
110
152
|
|
111
|
-
if @
|
112
|
-
|
113
|
-
|
114
|
-
@
|
153
|
+
if @depth.last == 'ad'
|
154
|
+
#puts search_key
|
155
|
+
#puts value_at.keys.inspect
|
156
|
+
#ad array #puts @constraints.select{|c| search_key.match(Regexp.new("#{c.depth.split('/').join('/')}$"))}.inspect
|
157
|
+
#puts current.keys.inspect
|
158
|
+
# others puts @constraints.select{|c| search_key.match(Regexp.new("#{c.depth.split('/')[0..-2].join('/')}$"))}.inspect
|
159
|
+
end
|
160
|
+
|
161
|
+
@on_tag_close.keys.select{ |r| search_key.match(r) }.each do |reg|
|
162
|
+
@on_tag_close[reg].call(value_at, @depth)
|
163
|
+
end
|
164
|
+
# TODO constraint should return Regexp like depth too
|
165
|
+
|
166
|
+
#puts @constraints.collect(&:state).inspect
|
167
|
+
|
168
|
+
@constraints.select{|c| [:not_checked, :passed].include?(c.state) }.select {|c| search_key.match(Regexp.new("#{c.depth.split('/').join('/')}$")) }.each do |constraint|
|
169
|
+
#puts "search_key: #{search_key}"
|
170
|
+
#puts "value_at.inspect: #{value_at.inspect}"
|
171
|
+
#puts "current.inspect: #{current.inspect}"
|
172
|
+
|
173
|
+
constraint.run!(current)
|
115
174
|
end
|
116
175
|
|
117
176
|
@depth.pop if @depth.last == element
|
@@ -125,7 +184,7 @@ module Bliss
|
|
125
184
|
end
|
126
185
|
|
127
186
|
def end_document
|
128
|
-
puts @nodes.inspect
|
187
|
+
#puts @nodes.inspect
|
129
188
|
end
|
130
189
|
end
|
131
190
|
end
|
data/spec.yml
CHANGED
@@ -1,23 +1,27 @@
|
|
1
1
|
# TODO content_type = url
|
2
2
|
---
|
3
|
-
|
3
|
+
trovit:
|
4
|
+
tag_name_values: [ root, trovit, sumavisos ]
|
5
|
+
ad: &ad
|
6
|
+
id:
|
7
|
+
content_type: numeric
|
8
|
+
description:
|
9
|
+
tag_name_values: [ description, content ]
|
10
|
+
content_type: string
|
11
|
+
pictures:
|
12
|
+
tag_name_required: false
|
13
|
+
picture:
|
14
|
+
tag_name_required: false
|
15
|
+
picture_url:
|
16
|
+
tag_name_required: true
|
17
|
+
tag_name_values: [ url, picture_url ]
|
18
|
+
content_type: string
|
19
|
+
content_format: /http:\/\/\w+/
|
20
|
+
url:
|
21
|
+
content_format: /http:\/\/\w+/
|
22
|
+
date:
|
23
|
+
content_type: date
|
4
24
|
ads:
|
5
|
-
|
25
|
+
tag_name_required: false
|
6
26
|
ad:
|
7
|
-
|
8
|
-
content_type: numeric
|
9
|
-
description:
|
10
|
-
tag_name_values: [ description, content ]
|
11
|
-
content_type: string
|
12
|
-
pictures:
|
13
|
-
tag_name_required: false
|
14
|
-
picture:
|
15
|
-
tag_name_required: false
|
16
|
-
url:
|
17
|
-
tag_name_required: true
|
18
|
-
content_type: string
|
19
|
-
content_format: /http:\/\/\w+/
|
20
|
-
url:
|
21
|
-
content_format: /http:\/\/\w+/
|
22
|
-
date:
|
23
|
-
content_type: date
|
27
|
+
<<: *ad
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Bliss::Constraint do
|
4
|
+
describe 'run!' do
|
5
|
+
it 'should pass' do
|
6
|
+
constraint = Bliss::Constraint.new("root", :tag_name_required)
|
7
|
+
constraint.run!({'root' => {'tag_1' => 'test', 'tag_2' => 'test'}})
|
8
|
+
constraint.state.should == :passed
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'should pass too' do
|
12
|
+
constraint = Bliss::Constraint.new("(root|ROOT)", :tag_name_required)
|
13
|
+
constraint.run!({'ROOT' => {'tag_1' => 'test', 'tag_2' => 'test'}})
|
14
|
+
constraint.state.should == :passed
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'should not pass' do
|
18
|
+
constraint = Bliss::Constraint.new("(root|ROOT)", :tag_name_required)
|
19
|
+
constraint.run!({'another' => {'tag_1' => 'test', 'tag_2' => 'test'}})
|
20
|
+
constraint.state.should == :not_passed
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/spec/format_spec.rb
CHANGED
@@ -4,60 +4,34 @@ require 'spec_helper'
|
|
4
4
|
|
5
5
|
describe Bliss::Format do
|
6
6
|
before do
|
7
|
-
#@openx_banner = mock(OpenX::Services::Banner)
|
8
7
|
@format = Bliss::Format.new
|
9
8
|
end
|
10
9
|
|
11
10
|
describe '.constraints' do
|
12
|
-
#before do
|
13
|
-
#end
|
14
|
-
|
15
11
|
it 'should do it' do
|
16
|
-
@format.constraints.
|
12
|
+
@format.constraints.should be_a(Array)
|
13
|
+
#@format.constraints.size.should == 8
|
17
14
|
end
|
18
15
|
end
|
19
16
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
# it 'should return statistics' do
|
28
|
-
# @banner.traffic(Date.today, Date.today).should be_kind_of(Hash)
|
29
|
-
# end
|
30
|
-
#end
|
31
|
-
|
32
|
-
# describe '.created' do
|
33
|
-
# context 'when last creation is less than 2 days ago' do
|
34
|
-
# before do
|
35
|
-
# @site.stub(:last_creation_day_in_week) { Date.today - 1 }
|
36
|
-
# end
|
37
|
-
|
38
|
-
# it 'should be ok' do
|
39
|
-
# @site_evaluation.created[@site.id]['created'][0].should == 'ok'
|
40
|
-
# end
|
41
|
-
# end
|
42
|
-
|
43
|
-
# context 'when last creation is between 2 and 7 days ago' do
|
44
|
-
# before do
|
45
|
-
# @site.stub(:last_creation_day_in_week) { Date.today - 3 }
|
46
|
-
# end
|
47
|
-
|
48
|
-
# it 'should be a warning' do
|
49
|
-
# @site_evaluation.created[@site.id]['created'][0].should == 'warning'
|
50
|
-
# end
|
51
|
-
# end
|
17
|
+
describe '#settings_to_constraints' do
|
18
|
+
it 'should return an array with a Bliss::Constraint object' do
|
19
|
+
constraints = Bliss::Format.settings_to_constraints(['root'], {'tag_name_required' => true})
|
20
|
+
constraints.should be_a(Array)
|
21
|
+
constraints.size.should == 1
|
22
|
+
constraints.first.should be_a(Bliss::Constraint)
|
23
|
+
end
|
52
24
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
25
|
+
it 'should have depth and setting loaded' do
|
26
|
+
constraints = Bliss::Format.settings_to_constraints(['root'], {'tag_name_required' => true})
|
27
|
+
constraints.first.depth.should == 'root'
|
28
|
+
constraints.first.setting.should == :tag_name_required
|
29
|
+
end
|
57
30
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
31
|
+
it 'should have multiple depths' do
|
32
|
+
constraints = Bliss::Format.settings_to_constraints(['root'], {'tag_name_required' => true, 'tag_name_values' => ['root', 'ROOT']})
|
33
|
+
constraints.first.depth.should == '(root|ROOT)'
|
34
|
+
constraints.first.setting.should == :tag_name_required
|
35
|
+
end
|
36
|
+
end
|
63
37
|
end
|
data/spec/parser_spec.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
#require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
3
|
+
#require_dependency 'xmlrpc/client'
|
4
|
+
|
5
|
+
describe Bliss::Parser do
|
6
|
+
before do
|
7
|
+
@parser = Bliss::Parser.new('http://www.topdiffusion.com/flux/topdiffusion_adsdeck.xml')
|
8
|
+
@format = Bliss::Format.new
|
9
|
+
@parser.add_format(@format)
|
10
|
+
end
|
11
|
+
|
12
|
+
context 'when parsing a valid document' do
|
13
|
+
before do
|
14
|
+
begin
|
15
|
+
@parser.parse
|
16
|
+
rescue
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
describe '.formats_details' do
|
21
|
+
it 'should have all required keys as existing' do
|
22
|
+
puts @parser.formats_details.inspect
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/test.rb
CHANGED
@@ -3,14 +3,17 @@ require 'bliss'
|
|
3
3
|
|
4
4
|
p = Bliss::Parser.new('', 'output.xml')
|
5
5
|
p.wait_tag_close('ad')
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
#}
|
6
|
+
p.on_max_unhandled_bytes(20000) {
|
7
|
+
puts 'Unhandled bytes!'
|
8
|
+
}
|
10
9
|
|
11
10
|
@count = 0
|
12
11
|
@makes = 0
|
13
12
|
|
13
|
+
f = Bliss::Format.new
|
14
|
+
|
15
|
+
p.add_format(f)
|
16
|
+
|
14
17
|
p.on_tag_close('ad') { |hash, depth|
|
15
18
|
if hash.has_key?('make')
|
16
19
|
@makes += 1
|
@@ -45,3 +48,5 @@ end
|
|
45
48
|
|
46
49
|
puts @count
|
47
50
|
puts @makes
|
51
|
+
|
52
|
+
puts p.formats_details.inspect
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bliss
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-06-
|
12
|
+
date: 2012-06-12 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
16
|
-
requirement: &
|
16
|
+
requirement: &8121280 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 1.5.2
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *8121280
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: eventmachine
|
27
|
-
requirement: &
|
27
|
+
requirement: &8120500 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 1.0.0.beta.4
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *8120500
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: em-http-request
|
38
|
-
requirement: &
|
38
|
+
requirement: &8119480 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 1.0.2
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *8119480
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: rspec
|
49
|
-
requirement: &
|
49
|
+
requirement: &8118780 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ~>
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: 2.8.0
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *8118780
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: bundler
|
60
|
-
requirement: &
|
60
|
+
requirement: &8117900 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: 1.1.3
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *8117900
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: jeweler
|
71
|
-
requirement: &
|
71
|
+
requirement: &8132400 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ~>
|
@@ -76,10 +76,10 @@ dependencies:
|
|
76
76
|
version: 1.6.4
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *8132400
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
81
|
name: simplecov
|
82
|
-
requirement: &
|
82
|
+
requirement: &8131260 !ruby/object:Gem::Requirement
|
83
83
|
none: false
|
84
84
|
requirements:
|
85
85
|
- - ! '>='
|
@@ -87,7 +87,7 @@ dependencies:
|
|
87
87
|
version: '0'
|
88
88
|
type: :development
|
89
89
|
prerelease: false
|
90
|
-
version_requirements: *
|
90
|
+
version_requirements: *8131260
|
91
91
|
description: streamed xml parsing tool
|
92
92
|
email: krakatoa1987@gmail.com
|
93
93
|
executables: []
|
@@ -117,7 +117,9 @@ files:
|
|
117
117
|
- lib/bliss/parser_machine.rb
|
118
118
|
- lib/hash_extension.rb
|
119
119
|
- spec.yml
|
120
|
+
- spec/constraint_spec.rb
|
120
121
|
- spec/format_spec.rb
|
122
|
+
- spec/parser_spec.rb
|
121
123
|
- spec/spec_helper.rb
|
122
124
|
- test.rb
|
123
125
|
- test/helper.rb
|
@@ -137,7 +139,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
137
139
|
version: '0'
|
138
140
|
segments:
|
139
141
|
- 0
|
140
|
-
hash:
|
142
|
+
hash: -4543548141741406741
|
141
143
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
142
144
|
none: false
|
143
145
|
requirements:
|