bliss 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +3 -3
- data/Gemfile.lock +11 -7
- data/Rakefile +1 -0
- data/VERSION +1 -1
- data/bliss.gemspec +11 -11
- data/lib/bliss/parser_machine.rb +86 -20
- metadata +20 -20
data/Gemfile
CHANGED
@@ -3,9 +3,9 @@ source "http://rubygems.org"
|
|
3
3
|
# Example:
|
4
4
|
# gem "activesupport", ">= 2.3.5"
|
5
5
|
|
6
|
-
gem "nokogiri"
|
7
|
-
gem "eventmachine", ">= 0.
|
8
|
-
gem "em-http-request"
|
6
|
+
gem "nokogiri", ">= 1.5.2"
|
7
|
+
gem "eventmachine", ">= 1.0.0.beta.4"
|
8
|
+
gem "em-http-request", ">= 1.0.2"
|
9
9
|
|
10
10
|
# Add dependencies to develop your gem here.
|
11
11
|
# Include everything needed to run rake, tests, features, etc.
|
data/Gemfile.lock
CHANGED
@@ -3,7 +3,7 @@ GEM
|
|
3
3
|
specs:
|
4
4
|
addressable (2.2.7)
|
5
5
|
cookiejar (0.3.0)
|
6
|
-
em-http-request (1.0.
|
6
|
+
em-http-request (1.0.2)
|
7
7
|
addressable (>= 2.2.3)
|
8
8
|
cookiejar
|
9
9
|
em-socksify
|
@@ -19,10 +19,14 @@ GEM
|
|
19
19
|
git (>= 1.2.5)
|
20
20
|
rake
|
21
21
|
multi_json (1.1.0)
|
22
|
-
nokogiri (1.5.
|
22
|
+
nokogiri (1.5.2)
|
23
23
|
rake (0.9.2.2)
|
24
|
-
shoulda (
|
25
|
-
|
24
|
+
shoulda (3.0.1)
|
25
|
+
shoulda-context (~> 1.0.0)
|
26
|
+
shoulda-matchers (~> 1.0.0)
|
27
|
+
shoulda-context (1.0.0)
|
28
|
+
shoulda-matchers (1.0.0)
|
29
|
+
simplecov (0.6.1)
|
26
30
|
multi_json (~> 1.0)
|
27
31
|
simplecov-html (~> 0.5.3)
|
28
32
|
simplecov-html (0.5.3)
|
@@ -32,9 +36,9 @@ PLATFORMS
|
|
32
36
|
|
33
37
|
DEPENDENCIES
|
34
38
|
bundler (~> 1.0.0)
|
35
|
-
em-http-request
|
36
|
-
eventmachine (>= 0.
|
39
|
+
em-http-request (>= 1.0.2)
|
40
|
+
eventmachine (>= 1.0.0.beta.4)
|
37
41
|
jeweler (~> 1.6.4)
|
38
|
-
nokogiri
|
42
|
+
nokogiri (>= 1.5.2)
|
39
43
|
shoulda
|
40
44
|
simplecov
|
data/Rakefile
CHANGED
@@ -21,6 +21,7 @@ Jeweler::Tasks.new do |gem|
|
|
21
21
|
gem.description = %Q{streamed xml parsing tool}
|
22
22
|
gem.email = "krakatoa1987@gmail.com"
|
23
23
|
gem.authors = ["Fernando Alonso"]
|
24
|
+
gem.require_paths = ["lib"]
|
24
25
|
# dependencies defined in Gemfile
|
25
26
|
end
|
26
27
|
Jeweler::RubygemsDotOrgTasks.new
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.6
|
data/bliss.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "bliss"
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.6"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Fernando Alonso"]
|
12
|
-
s.date = "2012-03-
|
12
|
+
s.date = "2012-03-21"
|
13
13
|
s.description = "streamed xml parsing tool"
|
14
14
|
s.email = "krakatoa1987@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -46,26 +46,26 @@ Gem::Specification.new do |s|
|
|
46
46
|
s.specification_version = 3
|
47
47
|
|
48
48
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
49
|
-
s.add_runtime_dependency(%q<nokogiri>, [">=
|
50
|
-
s.add_runtime_dependency(%q<eventmachine>, [">= 0.
|
51
|
-
s.add_runtime_dependency(%q<em-http-request>, [">= 0"])
|
49
|
+
s.add_runtime_dependency(%q<nokogiri>, [">= 1.5.2"])
|
50
|
+
s.add_runtime_dependency(%q<eventmachine>, [">= 1.0.0.beta.4"])
|
51
|
+
s.add_runtime_dependency(%q<em-http-request>, [">= 1.0.2"])
|
52
52
|
s.add_development_dependency(%q<shoulda>, [">= 0"])
|
53
53
|
s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
|
54
54
|
s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
|
55
55
|
s.add_development_dependency(%q<simplecov>, [">= 0"])
|
56
56
|
else
|
57
|
-
s.add_dependency(%q<nokogiri>, [">=
|
58
|
-
s.add_dependency(%q<eventmachine>, [">= 0.
|
59
|
-
s.add_dependency(%q<em-http-request>, [">= 0"])
|
57
|
+
s.add_dependency(%q<nokogiri>, [">= 1.5.2"])
|
58
|
+
s.add_dependency(%q<eventmachine>, [">= 1.0.0.beta.4"])
|
59
|
+
s.add_dependency(%q<em-http-request>, [">= 1.0.2"])
|
60
60
|
s.add_dependency(%q<shoulda>, [">= 0"])
|
61
61
|
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
62
62
|
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
63
63
|
s.add_dependency(%q<simplecov>, [">= 0"])
|
64
64
|
end
|
65
65
|
else
|
66
|
-
s.add_dependency(%q<nokogiri>, [">=
|
67
|
-
s.add_dependency(%q<eventmachine>, [">= 0.
|
68
|
-
s.add_dependency(%q<em-http-request>, [">= 0"])
|
66
|
+
s.add_dependency(%q<nokogiri>, [">= 1.5.2"])
|
67
|
+
s.add_dependency(%q<eventmachine>, [">= 1.0.0.beta.4"])
|
68
|
+
s.add_dependency(%q<em-http-request>, [">= 1.0.2"])
|
69
69
|
s.add_dependency(%q<shoulda>, [">= 0"])
|
70
70
|
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
71
71
|
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
data/lib/bliss/parser_machine.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
module Bliss
|
2
2
|
class ParserMachine
|
3
|
+
attr_writer :max_unhandled_bytes
|
4
|
+
|
3
5
|
def initialize(path, filepath=nil)
|
4
6
|
@path = path
|
5
7
|
|
@@ -27,11 +29,46 @@ module Bliss
|
|
27
29
|
|
28
30
|
def on_tag_open(element, &block)
|
29
31
|
return false if block.arity != 1
|
30
|
-
|
32
|
+
|
33
|
+
overriden_block = Proc.new { |depth|
|
34
|
+
reset_unhandled_bytes
|
35
|
+
block.call(depth)
|
36
|
+
}
|
37
|
+
@sax_parser.on_tag_open(element, overriden_block)
|
31
38
|
end
|
32
39
|
|
33
40
|
def on_tag_close(element, &block)
|
34
|
-
|
41
|
+
overriden_block = Proc.new { |hash|
|
42
|
+
reset_unhandled_bytes
|
43
|
+
block.call(hash)
|
44
|
+
}
|
45
|
+
@sax_parser.on_tag_close(element, overriden_block)
|
46
|
+
end
|
47
|
+
|
48
|
+
def wait_tag_close(element)
|
49
|
+
@wait_tag_close = "</#{element}>"
|
50
|
+
end
|
51
|
+
|
52
|
+
def reset_unhandled_bytes
|
53
|
+
return false if not check_unhandled_bytes?
|
54
|
+
@unhandled_bytes = 0
|
55
|
+
end
|
56
|
+
|
57
|
+
def check_unhandled_bytes
|
58
|
+
if @unhandled_bytes > @max_unhandled_bytes
|
59
|
+
self.close
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def exceeded?
|
64
|
+
return false if not check_unhandled_bytes?
|
65
|
+
if @unhandled_bytes > @max_unhandled_bytes
|
66
|
+
return true
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def check_unhandled_bytes?
|
71
|
+
@max_unhandled_bytes ? true : false
|
35
72
|
end
|
36
73
|
|
37
74
|
def root
|
@@ -43,33 +80,39 @@ module Bliss
|
|
43
80
|
end
|
44
81
|
|
45
82
|
def parse
|
46
|
-
|
83
|
+
reset_unhandled_bytes if check_unhandled_bytes?
|
47
84
|
|
48
85
|
EM.run do
|
49
86
|
http = EM::HttpRequest.new(@path).get
|
50
87
|
http.stream { |chunk|
|
51
|
-
chunk
|
88
|
+
if chunk
|
89
|
+
chunk.force_encoding('UTF-8')
|
52
90
|
|
53
|
-
|
91
|
+
@parser << chunk
|
54
92
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
if @file
|
59
|
-
@file << chunk
|
93
|
+
if check_unhandled_bytes?
|
94
|
+
@unhandled_bytes += chunk.length
|
95
|
+
check_unhandled_bytes
|
60
96
|
end
|
61
|
-
|
62
|
-
if @
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
@
|
97
|
+
|
98
|
+
if not @sax_parser.is_closed?
|
99
|
+
if @file
|
100
|
+
@file << chunk
|
101
|
+
end
|
102
|
+
else
|
103
|
+
if @file and not exceeded? and @wait_tag_close
|
104
|
+
handle_wait_tag_close(chunk) #if @wait_tag_close
|
105
|
+
else
|
106
|
+
begin
|
107
|
+
if @file
|
108
|
+
@file.close
|
109
|
+
end
|
110
|
+
ensure
|
111
|
+
EM.stop
|
112
|
+
end
|
69
113
|
end
|
70
|
-
end
|
71
114
|
|
72
|
-
|
115
|
+
end
|
73
116
|
end
|
74
117
|
}
|
75
118
|
http.callback {
|
@@ -80,6 +123,29 @@ module Bliss
|
|
80
123
|
}
|
81
124
|
end
|
82
125
|
end
|
126
|
+
|
127
|
+
def handle_wait_tag_close(chunk)
|
128
|
+
begin
|
129
|
+
last_index = chunk.index(@wait_tag_close)
|
130
|
+
if last_index
|
131
|
+
last_index += 4
|
132
|
+
@file << chunk[0..last_index]
|
133
|
+
@file << "</#{self.root}>" # TODO set this by using actual depth, so all tags get closed
|
134
|
+
@file.close
|
135
|
+
EM.stop
|
136
|
+
else
|
137
|
+
@file << chunk
|
138
|
+
end
|
139
|
+
rescue
|
140
|
+
begin
|
141
|
+
@file.close
|
142
|
+
rescue
|
143
|
+
ensure
|
144
|
+
EM.stop
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
83
149
|
end
|
84
150
|
end
|
85
151
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bliss
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,44 +9,44 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-03-
|
12
|
+
date: 2012-03-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
16
|
-
requirement: &
|
16
|
+
requirement: &16081520 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
20
20
|
- !ruby/object:Gem::Version
|
21
|
-
version:
|
21
|
+
version: 1.5.2
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *16081520
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: eventmachine
|
27
|
-
requirement: &
|
27
|
+
requirement: &16079400 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version:
|
32
|
+
version: 1.0.0.beta.4
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *16079400
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: em-http-request
|
38
|
-
requirement: &
|
38
|
+
requirement: &16076620 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
42
42
|
- !ruby/object:Gem::Version
|
43
|
-
version:
|
43
|
+
version: 1.0.2
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *16076620
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: shoulda
|
49
|
-
requirement: &
|
49
|
+
requirement: &16074940 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *16074940
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: bundler
|
60
|
-
requirement: &
|
60
|
+
requirement: &16088940 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: 1.0.0
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *16088940
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: jeweler
|
71
|
-
requirement: &
|
71
|
+
requirement: &16083840 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ~>
|
@@ -76,10 +76,10 @@ dependencies:
|
|
76
76
|
version: 1.6.4
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *16083840
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
81
|
name: simplecov
|
82
|
-
requirement: &
|
82
|
+
requirement: &16094900 !ruby/object:Gem::Requirement
|
83
83
|
none: false
|
84
84
|
requirements:
|
85
85
|
- - ! '>='
|
@@ -87,7 +87,7 @@ dependencies:
|
|
87
87
|
version: '0'
|
88
88
|
type: :development
|
89
89
|
prerelease: false
|
90
|
-
version_requirements: *
|
90
|
+
version_requirements: *16094900
|
91
91
|
description: streamed xml parsing tool
|
92
92
|
email: krakatoa1987@gmail.com
|
93
93
|
executables: []
|
@@ -129,7 +129,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
129
129
|
version: '0'
|
130
130
|
segments:
|
131
131
|
- 0
|
132
|
-
hash: -
|
132
|
+
hash: -3872124772469338070
|
133
133
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
134
134
|
none: false
|
135
135
|
requirements:
|