bliss 0.0.7 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.rdoc +12 -0
- data/Gemfile +2 -2
- data/Gemfile.lock +11 -7
- data/Rakefile +7 -1
- data/VERSION +1 -1
- data/bliss.gemspec +18 -10
- data/complete_test.rb +74 -0
- data/gzip_support.rb +41 -0
- data/http-machine.rb +2 -3
- data/lib/bliss.rb +5 -1
- data/lib/bliss/constraint.rb +78 -0
- data/lib/bliss/encoding_error.rb +4 -0
- data/lib/bliss/format.rb +97 -0
- data/lib/bliss/parser.rb +208 -3
- data/lib/bliss/parser_machine.rb +93 -123
- data/lib/hash_extension.rb +16 -0
- data/spec.yml +23 -0
- data/spec/format_spec.rb +63 -0
- data/spec/spec_helper.rb +16 -0
- data/test.rb +44 -2
- metadata +31 -23
- data/lib/bliss/sax_parser.rb +0 -122
data/lib/hash_extension.rb
CHANGED
@@ -20,4 +20,20 @@ class Hash
|
|
20
20
|
chain.pop
|
21
21
|
return self.value_at_chain(chain)
|
22
22
|
end
|
23
|
+
|
24
|
+
def recurse(include_root=false, depth=[], &block)
|
25
|
+
self.each_pair { |k,v|
|
26
|
+
if v.is_a? Hash
|
27
|
+
if include_root
|
28
|
+
block.call(depth + [k], v)
|
29
|
+
end
|
30
|
+
depth.push k
|
31
|
+
v.recurse(include_root, depth, &block)
|
32
|
+
else
|
33
|
+
block.call(depth + [k], v)
|
34
|
+
#return "#{depth + [k]}: #{v.inspect}"
|
35
|
+
end
|
36
|
+
}
|
37
|
+
depth.pop
|
38
|
+
end
|
23
39
|
end
|
data/spec.yml
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# TODO content_type = url
|
2
|
+
---
|
3
|
+
root:
|
4
|
+
ads:
|
5
|
+
tag_name_values: [ ads, jobs, job ]
|
6
|
+
ad:
|
7
|
+
id:
|
8
|
+
content_type: numeric
|
9
|
+
description:
|
10
|
+
tag_name_values: [ description, content ]
|
11
|
+
content_type: string
|
12
|
+
pictures:
|
13
|
+
tag_name_required: false
|
14
|
+
picture:
|
15
|
+
tag_name_required: false
|
16
|
+
url:
|
17
|
+
tag_name_required: true
|
18
|
+
content_type: string
|
19
|
+
content_format: /http:\/\/\w+/
|
20
|
+
url:
|
21
|
+
content_format: /http:\/\/\w+/
|
22
|
+
date:
|
23
|
+
content_type: date
|
data/spec/format_spec.rb
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
#require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
3
|
+
#require_dependency 'xmlrpc/client'
|
4
|
+
|
5
|
+
describe Bliss::Format do
|
6
|
+
before do
|
7
|
+
#@openx_banner = mock(OpenX::Services::Banner)
|
8
|
+
@format = Bliss::Format.new
|
9
|
+
end
|
10
|
+
|
11
|
+
describe '.constraints' do
|
12
|
+
#before do
|
13
|
+
#end
|
14
|
+
|
15
|
+
it 'should do it' do
|
16
|
+
@format.constraints.size.should == 8
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
#describe '.traffic' do
|
21
|
+
# before do
|
22
|
+
# @openx_banner.stub(:statistics) { YAML.load_file('spec/fixtures/openx_banner_statistics.yml') }
|
23
|
+
# OpenX::Services::Banner.should_receive(:find).with(1).and_return(@openx_banner)
|
24
|
+
# @banner = Banner.new(1)
|
25
|
+
# end
|
26
|
+
|
27
|
+
# it 'should return statistics' do
|
28
|
+
# @banner.traffic(Date.today, Date.today).should be_kind_of(Hash)
|
29
|
+
# end
|
30
|
+
#end
|
31
|
+
|
32
|
+
# describe '.created' do
|
33
|
+
# context 'when last creation is less than 2 days ago' do
|
34
|
+
# before do
|
35
|
+
# @site.stub(:last_creation_day_in_week) { Date.today - 1 }
|
36
|
+
# end
|
37
|
+
|
38
|
+
# it 'should be ok' do
|
39
|
+
# @site_evaluation.created[@site.id]['created'][0].should == 'ok'
|
40
|
+
# end
|
41
|
+
# end
|
42
|
+
|
43
|
+
# context 'when last creation is between 2 and 7 days ago' do
|
44
|
+
# before do
|
45
|
+
# @site.stub(:last_creation_day_in_week) { Date.today - 3 }
|
46
|
+
# end
|
47
|
+
|
48
|
+
# it 'should be a warning' do
|
49
|
+
# @site_evaluation.created[@site.id]['created'][0].should == 'warning'
|
50
|
+
# end
|
51
|
+
# end
|
52
|
+
|
53
|
+
# context 'when last creation is more than 7 days ago' do
|
54
|
+
# before do
|
55
|
+
# @site.stub(:last_creation_day_in_week) { Date.today - 8 }
|
56
|
+
# end
|
57
|
+
|
58
|
+
# it 'should be an alert' do
|
59
|
+
# @site_evaluation.created[@site.id]['created'][0].should == 'alert'
|
60
|
+
# end
|
61
|
+
# end
|
62
|
+
# end
|
63
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
3
|
+
require 'rspec'
|
4
|
+
|
5
|
+
require 'bundler'
|
6
|
+
Bundler.require(:default, :development)
|
7
|
+
|
8
|
+
Dir["#{File.dirname(__FILE__)}/../lib/**/*.rb"].each {|f| require f}
|
9
|
+
|
10
|
+
# Requires supporting files with custom matchers and macros, etc,
|
11
|
+
# in ./support/ and its subdirectories.
|
12
|
+
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
|
13
|
+
|
14
|
+
RSpec.configure do |config|
|
15
|
+
|
16
|
+
end
|
data/test.rb
CHANGED
@@ -1,5 +1,47 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'bliss'
|
3
3
|
|
4
|
-
p = Bliss::
|
5
|
-
p.
|
4
|
+
p = Bliss::Parser.new('', 'output.xml')
|
5
|
+
p.wait_tag_close('ad')
|
6
|
+
#p.on_max_unhandled_bytes(20000) {
|
7
|
+
# puts 'Reached Max Unhandled Bytes'
|
8
|
+
# p.close
|
9
|
+
#}
|
10
|
+
|
11
|
+
@count = 0
|
12
|
+
@makes = 0
|
13
|
+
|
14
|
+
p.on_tag_close('ad') { |hash, depth|
|
15
|
+
if hash.has_key?('make')
|
16
|
+
@makes += 1
|
17
|
+
end
|
18
|
+
@count += 1
|
19
|
+
|
20
|
+
if @count == 600
|
21
|
+
p.close
|
22
|
+
end
|
23
|
+
}
|
24
|
+
|
25
|
+
=begin
|
26
|
+
p.on_tag_close('ad') { |hash|
|
27
|
+
count += 1
|
28
|
+
|
29
|
+
dict = {"make"=>"name"}
|
30
|
+
only_in_dict = false
|
31
|
+
hash = hash.inject({}) { |h,v| key = dict.invert[v[0]]; key ||= v[0] unless only_in_dict; h[key] = v[1] if key; h }
|
32
|
+
|
33
|
+
#puts hash.keys.inspect
|
34
|
+
if count == 100
|
35
|
+
p.close
|
36
|
+
end
|
37
|
+
}
|
38
|
+
=end
|
39
|
+
|
40
|
+
begin
|
41
|
+
p.parse
|
42
|
+
rescue Bliss::EncodingError
|
43
|
+
puts "Encoding Error!"
|
44
|
+
end
|
45
|
+
|
46
|
+
puts @count
|
47
|
+
puts @makes
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bliss
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.9
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-06-04 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
16
|
-
requirement: &
|
16
|
+
requirement: &16900020 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 1.5.2
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *16900020
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: eventmachine
|
27
|
-
requirement: &
|
27
|
+
requirement: &16899520 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 1.0.0.beta.4
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *16899520
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: em-http-request
|
38
|
-
requirement: &
|
38
|
+
requirement: &16898800 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,32 +43,32 @@ dependencies:
|
|
43
43
|
version: 1.0.2
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *16898800
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
|
-
name:
|
49
|
-
requirement: &
|
48
|
+
name: rspec
|
49
|
+
requirement: &16898260 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ~>
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
54
|
+
version: 2.8.0
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *16898260
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: bundler
|
60
|
-
requirement: &
|
60
|
+
requirement: &16897260 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
64
64
|
- !ruby/object:Gem::Version
|
65
|
-
version: 1.
|
65
|
+
version: 1.1.3
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *16897260
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: jeweler
|
71
|
-
requirement: &
|
71
|
+
requirement: &16895760 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ~>
|
@@ -76,10 +76,10 @@ dependencies:
|
|
76
76
|
version: 1.6.4
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *16895760
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
81
|
name: simplecov
|
82
|
-
requirement: &
|
82
|
+
requirement: &16911540 !ruby/object:Gem::Requirement
|
83
83
|
none: false
|
84
84
|
requirements:
|
85
85
|
- - ! '>='
|
@@ -87,7 +87,7 @@ dependencies:
|
|
87
87
|
version: '0'
|
88
88
|
type: :development
|
89
89
|
prerelease: false
|
90
|
-
version_requirements: *
|
90
|
+
version_requirements: *16911540
|
91
91
|
description: streamed xml parsing tool
|
92
92
|
email: krakatoa1987@gmail.com
|
93
93
|
executables: []
|
@@ -97,6 +97,7 @@ extra_rdoc_files:
|
|
97
97
|
- README.rdoc
|
98
98
|
files:
|
99
99
|
- .document
|
100
|
+
- CHANGELOG.rdoc
|
100
101
|
- Gemfile
|
101
102
|
- Gemfile.lock
|
102
103
|
- LICENSE.txt
|
@@ -104,13 +105,20 @@ files:
|
|
104
105
|
- Rakefile
|
105
106
|
- VERSION
|
106
107
|
- bliss.gemspec
|
108
|
+
- complete_test.rb
|
109
|
+
- gzip_support.rb
|
107
110
|
- hash.rb
|
108
111
|
- http-machine.rb
|
109
112
|
- lib/bliss.rb
|
113
|
+
- lib/bliss/constraint.rb
|
114
|
+
- lib/bliss/encoding_error.rb
|
115
|
+
- lib/bliss/format.rb
|
110
116
|
- lib/bliss/parser.rb
|
111
117
|
- lib/bliss/parser_machine.rb
|
112
|
-
- lib/bliss/sax_parser.rb
|
113
118
|
- lib/hash_extension.rb
|
119
|
+
- spec.yml
|
120
|
+
- spec/format_spec.rb
|
121
|
+
- spec/spec_helper.rb
|
114
122
|
- test.rb
|
115
123
|
- test/helper.rb
|
116
124
|
- test/test_bliss.rb
|
@@ -129,7 +137,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
129
137
|
version: '0'
|
130
138
|
segments:
|
131
139
|
- 0
|
132
|
-
hash:
|
140
|
+
hash: 3727479071019105598
|
133
141
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
134
142
|
none: false
|
135
143
|
requirements:
|
@@ -138,7 +146,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
138
146
|
version: '0'
|
139
147
|
requirements: []
|
140
148
|
rubyforge_project:
|
141
|
-
rubygems_version: 1.8.
|
149
|
+
rubygems_version: 1.8.10
|
142
150
|
signing_key:
|
143
151
|
specification_version: 3
|
144
152
|
summary: streamed xml parsing tool
|
data/lib/bliss/sax_parser.rb
DELETED
@@ -1,122 +0,0 @@
|
|
1
|
-
module Bliss
|
2
|
-
class SaxParser < Nokogiri::XML::SAX::Document
|
3
|
-
def initialize
|
4
|
-
@depth = []
|
5
|
-
# @settings = {} # downcased
|
6
|
-
|
7
|
-
@root = nil
|
8
|
-
@nodes = {}
|
9
|
-
@current_node = {}
|
10
|
-
|
11
|
-
@on_root = nil
|
12
|
-
@on_tag_open = {}
|
13
|
-
@on_tag_close = {}
|
14
|
-
|
15
|
-
@closed = false
|
16
|
-
|
17
|
-
end
|
18
|
-
|
19
|
-
def on_root(&block)
|
20
|
-
@on_root = block
|
21
|
-
end
|
22
|
-
|
23
|
-
def on_tag_open(element, block)
|
24
|
-
@on_tag_open.merge!({element => block})
|
25
|
-
end
|
26
|
-
|
27
|
-
def on_tag_close(element, block)
|
28
|
-
@on_tag_close.merge!({element => block})
|
29
|
-
end
|
30
|
-
|
31
|
-
def close
|
32
|
-
@closed = true
|
33
|
-
end
|
34
|
-
|
35
|
-
def is_closed?
|
36
|
-
@closed
|
37
|
-
end
|
38
|
-
|
39
|
-
def start_element(element, attributes)
|
40
|
-
# element_transformation
|
41
|
-
|
42
|
-
if @root == nil
|
43
|
-
@root = element
|
44
|
-
if @on_root.is_a? Proc
|
45
|
-
@on_root.call(@root)
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
@depth.push(element) if @depth.last != element
|
50
|
-
|
51
|
-
if @on_tag_open.has_key? element
|
52
|
-
@on_tag_open[element].call(@depth)
|
53
|
-
end
|
54
|
-
|
55
|
-
current = @nodes.pair_at_chain(@depth)
|
56
|
-
|
57
|
-
value_at = @nodes.value_at_chain(@depth)
|
58
|
-
|
59
|
-
if current.is_a? Hash
|
60
|
-
if value_at.is_a? NilClass
|
61
|
-
current[element] = {}
|
62
|
-
elsif value_at.is_a? Hash
|
63
|
-
if current[element].is_a? Array
|
64
|
-
current[element].concat [{}]
|
65
|
-
else
|
66
|
-
current[element] = [current[element], {}]
|
67
|
-
#current = @nodes.pair_at_chain(@depth)
|
68
|
-
end
|
69
|
-
elsif value_at.is_a? Array
|
70
|
-
#puts @depth.inspect
|
71
|
-
#puts current[element].inspect
|
72
|
-
#puts current[element].inspect
|
73
|
-
end
|
74
|
-
elsif current.is_a? Array
|
75
|
-
end
|
76
|
-
|
77
|
-
@current_content = ''
|
78
|
-
end
|
79
|
-
|
80
|
-
def characters(string)
|
81
|
-
concat_content(string)
|
82
|
-
end
|
83
|
-
|
84
|
-
def cdata_block(string)
|
85
|
-
concat_content(string)
|
86
|
-
end
|
87
|
-
|
88
|
-
def end_element(element, attributes=[])
|
89
|
-
# element_transformation
|
90
|
-
|
91
|
-
current = @nodes.pair_at_chain(@depth)
|
92
|
-
value_at = @nodes.value_at_chain(@depth)
|
93
|
-
|
94
|
-
if value_at.is_a? Hash
|
95
|
-
current[element] = @current_content if @current_content.size > 0
|
96
|
-
elsif value_at.is_a? NilClass
|
97
|
-
if current.is_a? Array
|
98
|
-
current = current.last
|
99
|
-
current[element] = @current_content if @current_content.size > 0
|
100
|
-
end
|
101
|
-
end
|
102
|
-
@current_content = ''
|
103
|
-
|
104
|
-
if @on_tag_close.has_key? element
|
105
|
-
@on_tag_close[element].call(value_at)
|
106
|
-
end
|
107
|
-
|
108
|
-
@depth.pop if @depth.last == element
|
109
|
-
end
|
110
|
-
|
111
|
-
def concat_content(string)
|
112
|
-
string.strip!
|
113
|
-
if string
|
114
|
-
@current_content << string
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
def end_document
|
119
|
-
puts @nodes.inspect
|
120
|
-
end
|
121
|
-
end
|
122
|
-
end
|