bliss 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +3 -3
- data/Gemfile.lock +11 -7
- data/Rakefile +1 -0
- data/VERSION +1 -1
- data/bliss.gemspec +11 -11
- data/lib/bliss/parser_machine.rb +86 -20
- metadata +20 -20
data/Gemfile
CHANGED
@@ -3,9 +3,9 @@ source "http://rubygems.org"
|
|
3
3
|
# Example:
|
4
4
|
# gem "activesupport", ">= 2.3.5"
|
5
5
|
|
6
|
-
gem "nokogiri"
|
7
|
-
gem "eventmachine", ">= 0.
|
8
|
-
gem "em-http-request"
|
6
|
+
gem "nokogiri", ">= 1.5.2"
|
7
|
+
gem "eventmachine", ">= 1.0.0.beta.4"
|
8
|
+
gem "em-http-request", ">= 1.0.2"
|
9
9
|
|
10
10
|
# Add dependencies to develop your gem here.
|
11
11
|
# Include everything needed to run rake, tests, features, etc.
|
data/Gemfile.lock
CHANGED
@@ -3,7 +3,7 @@ GEM
|
|
3
3
|
specs:
|
4
4
|
addressable (2.2.7)
|
5
5
|
cookiejar (0.3.0)
|
6
|
-
em-http-request (1.0.
|
6
|
+
em-http-request (1.0.2)
|
7
7
|
addressable (>= 2.2.3)
|
8
8
|
cookiejar
|
9
9
|
em-socksify
|
@@ -19,10 +19,14 @@ GEM
|
|
19
19
|
git (>= 1.2.5)
|
20
20
|
rake
|
21
21
|
multi_json (1.1.0)
|
22
|
-
nokogiri (1.5.
|
22
|
+
nokogiri (1.5.2)
|
23
23
|
rake (0.9.2.2)
|
24
|
-
shoulda (
|
25
|
-
|
24
|
+
shoulda (3.0.1)
|
25
|
+
shoulda-context (~> 1.0.0)
|
26
|
+
shoulda-matchers (~> 1.0.0)
|
27
|
+
shoulda-context (1.0.0)
|
28
|
+
shoulda-matchers (1.0.0)
|
29
|
+
simplecov (0.6.1)
|
26
30
|
multi_json (~> 1.0)
|
27
31
|
simplecov-html (~> 0.5.3)
|
28
32
|
simplecov-html (0.5.3)
|
@@ -32,9 +36,9 @@ PLATFORMS
|
|
32
36
|
|
33
37
|
DEPENDENCIES
|
34
38
|
bundler (~> 1.0.0)
|
35
|
-
em-http-request
|
36
|
-
eventmachine (>= 0.
|
39
|
+
em-http-request (>= 1.0.2)
|
40
|
+
eventmachine (>= 1.0.0.beta.4)
|
37
41
|
jeweler (~> 1.6.4)
|
38
|
-
nokogiri
|
42
|
+
nokogiri (>= 1.5.2)
|
39
43
|
shoulda
|
40
44
|
simplecov
|
data/Rakefile
CHANGED
@@ -21,6 +21,7 @@ Jeweler::Tasks.new do |gem|
|
|
21
21
|
gem.description = %Q{streamed xml parsing tool}
|
22
22
|
gem.email = "krakatoa1987@gmail.com"
|
23
23
|
gem.authors = ["Fernando Alonso"]
|
24
|
+
gem.require_paths = ["lib"]
|
24
25
|
# dependencies defined in Gemfile
|
25
26
|
end
|
26
27
|
Jeweler::RubygemsDotOrgTasks.new
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.6
|
data/bliss.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "bliss"
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.6"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Fernando Alonso"]
|
12
|
-
s.date = "2012-03-
|
12
|
+
s.date = "2012-03-21"
|
13
13
|
s.description = "streamed xml parsing tool"
|
14
14
|
s.email = "krakatoa1987@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -46,26 +46,26 @@ Gem::Specification.new do |s|
|
|
46
46
|
s.specification_version = 3
|
47
47
|
|
48
48
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
49
|
-
s.add_runtime_dependency(%q<nokogiri>, [">=
|
50
|
-
s.add_runtime_dependency(%q<eventmachine>, [">= 0.
|
51
|
-
s.add_runtime_dependency(%q<em-http-request>, [">= 0"])
|
49
|
+
s.add_runtime_dependency(%q<nokogiri>, [">= 1.5.2"])
|
50
|
+
s.add_runtime_dependency(%q<eventmachine>, [">= 1.0.0.beta.4"])
|
51
|
+
s.add_runtime_dependency(%q<em-http-request>, [">= 1.0.2"])
|
52
52
|
s.add_development_dependency(%q<shoulda>, [">= 0"])
|
53
53
|
s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
|
54
54
|
s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
|
55
55
|
s.add_development_dependency(%q<simplecov>, [">= 0"])
|
56
56
|
else
|
57
|
-
s.add_dependency(%q<nokogiri>, [">=
|
58
|
-
s.add_dependency(%q<eventmachine>, [">= 0.
|
59
|
-
s.add_dependency(%q<em-http-request>, [">= 0"])
|
57
|
+
s.add_dependency(%q<nokogiri>, [">= 1.5.2"])
|
58
|
+
s.add_dependency(%q<eventmachine>, [">= 1.0.0.beta.4"])
|
59
|
+
s.add_dependency(%q<em-http-request>, [">= 1.0.2"])
|
60
60
|
s.add_dependency(%q<shoulda>, [">= 0"])
|
61
61
|
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
62
62
|
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
63
63
|
s.add_dependency(%q<simplecov>, [">= 0"])
|
64
64
|
end
|
65
65
|
else
|
66
|
-
s.add_dependency(%q<nokogiri>, [">=
|
67
|
-
s.add_dependency(%q<eventmachine>, [">= 0.
|
68
|
-
s.add_dependency(%q<em-http-request>, [">= 0"])
|
66
|
+
s.add_dependency(%q<nokogiri>, [">= 1.5.2"])
|
67
|
+
s.add_dependency(%q<eventmachine>, [">= 1.0.0.beta.4"])
|
68
|
+
s.add_dependency(%q<em-http-request>, [">= 1.0.2"])
|
69
69
|
s.add_dependency(%q<shoulda>, [">= 0"])
|
70
70
|
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
71
71
|
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
data/lib/bliss/parser_machine.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
module Bliss
|
2
2
|
class ParserMachine
|
3
|
+
attr_writer :max_unhandled_bytes
|
4
|
+
|
3
5
|
def initialize(path, filepath=nil)
|
4
6
|
@path = path
|
5
7
|
|
@@ -27,11 +29,46 @@ module Bliss
|
|
27
29
|
|
28
30
|
def on_tag_open(element, &block)
|
29
31
|
return false if block.arity != 1
|
30
|
-
|
32
|
+
|
33
|
+
overriden_block = Proc.new { |depth|
|
34
|
+
reset_unhandled_bytes
|
35
|
+
block.call(depth)
|
36
|
+
}
|
37
|
+
@sax_parser.on_tag_open(element, overriden_block)
|
31
38
|
end
|
32
39
|
|
33
40
|
def on_tag_close(element, &block)
|
34
|
-
|
41
|
+
overriden_block = Proc.new { |hash|
|
42
|
+
reset_unhandled_bytes
|
43
|
+
block.call(hash)
|
44
|
+
}
|
45
|
+
@sax_parser.on_tag_close(element, overriden_block)
|
46
|
+
end
|
47
|
+
|
48
|
+
def wait_tag_close(element)
|
49
|
+
@wait_tag_close = "</#{element}>"
|
50
|
+
end
|
51
|
+
|
52
|
+
def reset_unhandled_bytes
|
53
|
+
return false if not check_unhandled_bytes?
|
54
|
+
@unhandled_bytes = 0
|
55
|
+
end
|
56
|
+
|
57
|
+
def check_unhandled_bytes
|
58
|
+
if @unhandled_bytes > @max_unhandled_bytes
|
59
|
+
self.close
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def exceeded?
|
64
|
+
return false if not check_unhandled_bytes?
|
65
|
+
if @unhandled_bytes > @max_unhandled_bytes
|
66
|
+
return true
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def check_unhandled_bytes?
|
71
|
+
@max_unhandled_bytes ? true : false
|
35
72
|
end
|
36
73
|
|
37
74
|
def root
|
@@ -43,33 +80,39 @@ module Bliss
|
|
43
80
|
end
|
44
81
|
|
45
82
|
def parse
|
46
|
-
|
83
|
+
reset_unhandled_bytes if check_unhandled_bytes?
|
47
84
|
|
48
85
|
EM.run do
|
49
86
|
http = EM::HttpRequest.new(@path).get
|
50
87
|
http.stream { |chunk|
|
51
|
-
chunk
|
88
|
+
if chunk
|
89
|
+
chunk.force_encoding('UTF-8')
|
52
90
|
|
53
|
-
|
91
|
+
@parser << chunk
|
54
92
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
if @file
|
59
|
-
@file << chunk
|
93
|
+
if check_unhandled_bytes?
|
94
|
+
@unhandled_bytes += chunk.length
|
95
|
+
check_unhandled_bytes
|
60
96
|
end
|
61
|
-
|
62
|
-
if @
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
@
|
97
|
+
|
98
|
+
if not @sax_parser.is_closed?
|
99
|
+
if @file
|
100
|
+
@file << chunk
|
101
|
+
end
|
102
|
+
else
|
103
|
+
if @file and not exceeded? and @wait_tag_close
|
104
|
+
handle_wait_tag_close(chunk) #if @wait_tag_close
|
105
|
+
else
|
106
|
+
begin
|
107
|
+
if @file
|
108
|
+
@file.close
|
109
|
+
end
|
110
|
+
ensure
|
111
|
+
EM.stop
|
112
|
+
end
|
69
113
|
end
|
70
|
-
end
|
71
114
|
|
72
|
-
|
115
|
+
end
|
73
116
|
end
|
74
117
|
}
|
75
118
|
http.callback {
|
@@ -80,6 +123,29 @@ module Bliss
|
|
80
123
|
}
|
81
124
|
end
|
82
125
|
end
|
126
|
+
|
127
|
+
def handle_wait_tag_close(chunk)
|
128
|
+
begin
|
129
|
+
last_index = chunk.index(@wait_tag_close)
|
130
|
+
if last_index
|
131
|
+
last_index += 4
|
132
|
+
@file << chunk[0..last_index]
|
133
|
+
@file << "</#{self.root}>" # TODO set this by using actual depth, so all tags get closed
|
134
|
+
@file.close
|
135
|
+
EM.stop
|
136
|
+
else
|
137
|
+
@file << chunk
|
138
|
+
end
|
139
|
+
rescue
|
140
|
+
begin
|
141
|
+
@file.close
|
142
|
+
rescue
|
143
|
+
ensure
|
144
|
+
EM.stop
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
83
149
|
end
|
84
150
|
end
|
85
151
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bliss
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,44 +9,44 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-03-
|
12
|
+
date: 2012-03-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
16
|
-
requirement: &
|
16
|
+
requirement: &16081520 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
20
20
|
- !ruby/object:Gem::Version
|
21
|
-
version:
|
21
|
+
version: 1.5.2
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *16081520
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: eventmachine
|
27
|
-
requirement: &
|
27
|
+
requirement: &16079400 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version:
|
32
|
+
version: 1.0.0.beta.4
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *16079400
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: em-http-request
|
38
|
-
requirement: &
|
38
|
+
requirement: &16076620 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
42
42
|
- !ruby/object:Gem::Version
|
43
|
-
version:
|
43
|
+
version: 1.0.2
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *16076620
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: shoulda
|
49
|
-
requirement: &
|
49
|
+
requirement: &16074940 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *16074940
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: bundler
|
60
|
-
requirement: &
|
60
|
+
requirement: &16088940 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: 1.0.0
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *16088940
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: jeweler
|
71
|
-
requirement: &
|
71
|
+
requirement: &16083840 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ~>
|
@@ -76,10 +76,10 @@ dependencies:
|
|
76
76
|
version: 1.6.4
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *16083840
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
81
|
name: simplecov
|
82
|
-
requirement: &
|
82
|
+
requirement: &16094900 !ruby/object:Gem::Requirement
|
83
83
|
none: false
|
84
84
|
requirements:
|
85
85
|
- - ! '>='
|
@@ -87,7 +87,7 @@ dependencies:
|
|
87
87
|
version: '0'
|
88
88
|
type: :development
|
89
89
|
prerelease: false
|
90
|
-
version_requirements: *
|
90
|
+
version_requirements: *16094900
|
91
91
|
description: streamed xml parsing tool
|
92
92
|
email: krakatoa1987@gmail.com
|
93
93
|
executables: []
|
@@ -129,7 +129,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
129
129
|
version: '0'
|
130
130
|
segments:
|
131
131
|
- 0
|
132
|
-
hash: -
|
132
|
+
hash: -3872124772469338070
|
133
133
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
134
134
|
none: false
|
135
135
|
requirements:
|