bliss 0.0.7 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,217 @@
1
1
  module Bliss
2
2
  class Parser
3
- def initialize(path)
3
+ def initialize(path, filepath=nil)
4
4
  @path = path
5
- @parser_machine = Bliss::ParserMachine.new(path)
5
+
6
+ @parser_machine = Bliss::ParserMachine.new
7
+
8
+ @push_parser = Nokogiri::XML::SAX::PushParser.new(@parser_machine)
9
+
10
+ if filepath
11
+ @file = File.new(filepath, 'w')
12
+ @file.autoclose = false
13
+ end
14
+
15
+ @root = nil
16
+ @nodes = nil
17
+
18
+ on_root {}
19
+ end
20
+
21
+ def on_root(&block)
22
+ return false if not block.is_a? Proc
23
+ @parser_machine.on_root { |root|
24
+ @root = root
25
+ block.call(root)
26
+ }
27
+ end
28
+
29
+ def on_tag_open(element='default', &block)
30
+ return false if block.arity != 1
31
+
32
+ overriden_block = Proc.new { |depth|
33
+ if not element == 'default'
34
+ reset_unhandled_bytes
35
+ end
36
+ block.call(depth)
37
+ }
38
+ @parser_machine.on_tag_open(element, overriden_block)
39
+ end
40
+
41
+ def on_tag_close(element='default', &block)
42
+ overriden_block = Proc.new { |hash, depth|
43
+ #if not element == 'default'
44
+ reset_unhandled_bytes
45
+ #end
46
+ block.call(hash, depth)
47
+ }
48
+ @parser_machine.on_tag_close(element, overriden_block)
49
+ end
50
+
51
+ def on_max_unhandled_bytes(bytes, &block)
52
+ @max_unhandled_bytes = bytes
53
+ @on_max_unhandled_bytes = block
54
+ end
55
+
56
+ def wait_tag_close(element)
57
+ @wait_tag_close = "</#{element}>"
58
+ end
59
+
60
+ def reset_unhandled_bytes
61
+ return false if not check_unhandled_bytes?
62
+ @unhandled_bytes = 0
63
+ end
64
+
65
+ def check_unhandled_bytes
66
+ if @unhandled_bytes > @max_unhandled_bytes
67
+ if @on_max_unhandled_bytes
68
+ @on_max_unhandled_bytes.call
69
+ @on_max_unhandled_bytes = nil
70
+ end
71
+ #self.close
72
+ end
73
+ end
74
+
75
+ def exceeded?
76
+ return false if not check_unhandled_bytes?
77
+ if @unhandled_bytes > @max_unhandled_bytes
78
+ return true
79
+ end
80
+ end
81
+
82
+ def check_unhandled_bytes?
83
+ @max_unhandled_bytes ? true : false
84
+ end
85
+
86
+ def root
87
+ @root
88
+ end
89
+
90
+ def close
91
+ @parser_machine.close
6
92
  end
7
93
 
8
94
  def parse
9
- @parser_machine.parse
95
+ reset_unhandled_bytes if check_unhandled_bytes?
96
+
97
+ EM.run do
98
+ http = EM::HttpRequest.new(@path).get
99
+
100
+ @autodetect_compression = true
101
+ compression = :none
102
+ if @autodetect_compression
103
+ http.headers do
104
+ if (/^attachment.+filename.+\.gz/i === http.response_header['CONTENT_DISPOSITION']) or http.response_header.compressed? or ["application/octet-stream", "application/x-gzip"].include? http.response_header['CONTENT_TYPE']
105
+ @zstream = Zlib::Inflate.new(Zlib::MAX_WBITS+16)
106
+ compression = :gzip
107
+ end
108
+ end
109
+ end
110
+
111
+ http.stream { |chunk|
112
+ if chunk
113
+ chunk.force_encoding('UTF-8')
114
+
115
+ if check_unhandled_bytes?
116
+ @unhandled_bytes += chunk.length
117
+ check_unhandled_bytes
118
+ end
119
+ if not @parser_machine.is_closed?
120
+ begin
121
+ case compression
122
+ when :gzip
123
+ chunk = @zstream.inflate(chunk)
124
+ chunk.force_encoding('UTF-8')
125
+ end
126
+ @push_parser << chunk
127
+ if @file
128
+ @file << chunk
129
+ end
130
+ rescue Nokogiri::XML::SyntaxError => e
131
+ #puts 'encoding error'
132
+ if e.message.include?("encoding")
133
+ raise Bliss::EncodingError, "Wrong encoding given"
134
+ end
135
+ end
136
+
137
+ else
138
+ if exceeded?
139
+ #puts 'exceeded'
140
+ secure_close
141
+ else
142
+ if @file
143
+ if @wait_tag_close
144
+ #puts 'handle wait'
145
+ handle_wait_tag_close(chunk) #if @wait_tag_close
146
+ else
147
+ #puts 'secure close'
148
+ secure_close
149
+ end
150
+ end
151
+ end
152
+ end
153
+ end
154
+ }
155
+ http.errback {
156
+ #puts 'errback'
157
+ secure_close
158
+ }
159
+ http.callback {
160
+ #if @file
161
+ # @file.close
162
+ #end
163
+ #EM.stop
164
+ secure_close
165
+ }
166
+ end
167
+ file_close
10
168
  end
169
+
170
+ def autodetect_compression(http)
171
+ #compression = :none
172
+ puts compression
173
+ return compression
174
+ end
175
+
176
+ def handle_wait_tag_close(chunk)
177
+ begin
178
+ last_index = chunk.index(@wait_tag_close)
179
+ if last_index
180
+ last_index += 4
181
+ @file << chunk[0..last_index]
182
+ @file << "</#{self.root}>" # TODO set this by using actual depth, so all tags get closed
183
+ secure_close
184
+ else
185
+ @file << chunk
186
+ end
187
+ rescue
188
+ secure_close
189
+ end
190
+ end
191
+
192
+ def file_close
193
+ if @file
194
+ @file.close
195
+ end
196
+ end
197
+
198
+ def secure_close
199
+ begin
200
+ if @zstream
201
+ @zstream.close
202
+ end
203
+ rescue
204
+ ensure
205
+ EM.stop
206
+ #puts "Closed secure."
207
+ end
208
+ end
209
+
11
210
  end
12
211
  end
212
+
213
+ #require 'stringio'
214
+ #str = StringIO.new
215
+ #z = Zlib::GzipWriter.new(str)
216
+ #z.write(txt)
217
+ #z.close
@@ -1,161 +1,131 @@
1
1
  module Bliss
2
- class ParserMachine
3
- attr_writer :max_unhandled_bytes
2
+ class ParserMachine < Nokogiri::XML::SAX::Document
3
+ def initialize
4
+ @depth = []
5
+ # @settings = {} # downcased
4
6
 
5
- def initialize(path, filepath=nil)
6
- @path = path
7
-
8
- @sax_parser = Bliss::SaxParser.new
7
+ @root = nil
8
+ @nodes = {}
9
+ @current_node = {}
9
10
 
10
- @parser = Nokogiri::XML::SAX::PushParser.new(@sax_parser)
11
+ @on_root = nil
11
12
 
12
- if filepath
13
- @file = File.new(filepath, 'w')
14
- end
13
+ @on_tag_open = {}
14
+ @on_tag_close = {}
15
15
 
16
- @root = nil
17
- @nodes = nil
16
+ @closed = false
18
17
 
19
- on_root {}
20
18
  end
21
19
 
22
20
  def on_root(&block)
23
- return false if not block.is_a? Proc
24
- @sax_parser.on_root { |root|
25
- @root = root
26
- block.call(root)
27
- }
21
+ @on_root = block
28
22
  end
29
23
 
30
- def on_tag_open(element, &block)
31
- return false if block.arity != 1
32
-
33
- overriden_block = Proc.new { |depth|
34
- reset_unhandled_bytes
35
- block.call(depth)
36
- }
37
- @sax_parser.on_tag_open(element, overriden_block)
24
+ def on_tag_open(element, block)
25
+ @on_tag_open.merge!({element => block})
38
26
  end
39
27
 
40
- def on_tag_close(element, &block)
41
- overriden_block = Proc.new { |hash|
42
- reset_unhandled_bytes
43
- block.call(hash)
44
- }
45
- @sax_parser.on_tag_close(element, overriden_block)
28
+ def on_tag_close(element, block)
29
+ @on_tag_close.merge!({element => block})
46
30
  end
47
31
 
48
- def wait_tag_close(element)
49
- @wait_tag_close = "</#{element}>"
32
+ def close
33
+ @closed = true
50
34
  end
51
35
 
52
- def reset_unhandled_bytes
53
- return false if not check_unhandled_bytes?
54
- @unhandled_bytes = 0
36
+ def is_closed?
37
+ @closed
55
38
  end
56
39
 
57
- def check_unhandled_bytes
58
- if @unhandled_bytes > @max_unhandled_bytes
59
- self.close
40
+ def start_element(element, attributes)
41
+ return if is_closed?
42
+ # element_transformation
43
+
44
+ if @root == nil
45
+ @root = element
46
+ if @on_root.is_a? Proc
47
+ @on_root.call(@root)
48
+ end
60
49
  end
61
- end
62
50
 
63
- def exceeded?
64
- return false if not check_unhandled_bytes?
65
- if @unhandled_bytes > @max_unhandled_bytes
66
- return true
51
+ @depth.push(element) if @depth.last != element
52
+
53
+ if @on_tag_open.has_key? element
54
+ @on_tag_open[element].call(@depth)
55
+ elsif @on_tag_open.has_key? 'default'
56
+ @on_tag_open['default'].call(@depth)
67
57
  end
68
- end
69
58
 
70
- def check_unhandled_bytes?
71
- @max_unhandled_bytes ? true : false
72
- end
59
+ current = @nodes.pair_at_chain(@depth)
73
60
 
74
- def root
75
- @root
61
+ value_at = @nodes.value_at_chain(@depth)
62
+
63
+ if current.is_a? Hash
64
+ if value_at.is_a? NilClass
65
+ current[element] = {}
66
+ elsif value_at.is_a? Hash
67
+ if current[element].is_a? Array
68
+ current[element].concat [{}]
69
+ else
70
+ current[element] = [current[element], {}]
71
+ #current = @nodes.pair_at_chain(@depth)
72
+ end
73
+ elsif value_at.is_a? Array
74
+ #puts @depth.inspect
75
+ #puts current[element].inspect
76
+ #puts current[element].inspect
77
+ end
78
+ elsif current.is_a? Array
79
+ end
80
+
81
+ @current_content = ''
76
82
  end
77
83
 
78
- def close
79
- @sax_parser.close
84
+ def characters(string)
85
+ return if is_closed?
86
+ concat_content(string)
80
87
  end
81
88
 
82
- def parse
83
- reset_unhandled_bytes if check_unhandled_bytes?
84
-
85
- EM.run do
86
- http = EM::HttpRequest.new(@path).get
87
- http.stream { |chunk|
88
- if chunk
89
- chunk.force_encoding('UTF-8')
90
-
91
- @parser << chunk
92
-
93
- if check_unhandled_bytes?
94
- @unhandled_bytes += chunk.length
95
- check_unhandled_bytes
96
- end
97
-
98
- if not @sax_parser.is_closed?
99
- if @file
100
- @file << chunk
101
- end
102
- else
103
- if exceeded?
104
- #puts 'exceeded'
105
- secure_close
106
- else
107
- if @file
108
- if @wait_tag_close
109
- #puts 'handle wait'
110
- handle_wait_tag_close(chunk) #if @wait_tag_close
111
- else
112
- #puts 'secure close'
113
- secure_close
114
- end
115
- end
116
- end
117
- end
118
- end
119
- }
120
- http.callback {
121
- if @file
122
- @file.close
123
- end
124
- EM.stop
125
- }
126
- end
89
+ def cdata_block(string)
90
+ return if is_closed?
91
+ concat_content(string)
127
92
  end
128
-
129
- def handle_wait_tag_close(chunk)
130
- begin
131
- last_index = chunk.index(@wait_tag_close)
132
- if last_index
133
- last_index += 4
134
- @file << chunk[0..last_index]
135
- @file << "</#{self.root}>" # TODO set this by using actual depth, so all tags get closed
136
- secure_close
137
- else
138
- @file << chunk
93
+
94
+ def end_element(element, attributes=[])
95
+ return if is_closed?
96
+ # element_transformation
97
+
98
+ current = @nodes.pair_at_chain(@depth)
99
+ value_at = @nodes.value_at_chain(@depth)
100
+
101
+ if value_at.is_a? Hash
102
+ current[element] = @current_content if @current_content.size > 0
103
+ elsif value_at.is_a? NilClass
104
+ if current.is_a? Array
105
+ current = current.last
106
+ current[element] = @current_content if @current_content.size > 0
139
107
  end
140
- rescue
141
- secure_close
142
108
  end
109
+ @current_content = ''
110
+
111
+ if @on_tag_close.has_key? element
112
+ @on_tag_close[element].call(value_at, @depth)
113
+ elsif @on_tag_close.has_key? 'default'
114
+ @on_tag_close['default'].call(value_at, @depth)
115
+ end
116
+
117
+ @depth.pop if @depth.last == element
143
118
  end
144
119
 
145
- def secure_close
146
- begin
147
- @file.close
148
- rescue
149
- ensure
150
- EM.stop
120
+ def concat_content(string)
121
+ string.strip!
122
+ if string
123
+ @current_content << string
151
124
  end
152
125
  end
153
126
 
127
+ def end_document
128
+ puts @nodes.inspect
129
+ end
154
130
  end
155
131
  end
156
-
157
- #require 'stringio'
158
- #str = StringIO.new
159
- #z = Zlib::GzipWriter.new(str)
160
- #z.write(txt)
161
- #z.close