bliss 0.0.7 → 0.0.9

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,12 +1,217 @@
1
1
  module Bliss
2
2
  class Parser
3
- def initialize(path)
3
+ def initialize(path, filepath=nil)
4
4
  @path = path
5
- @parser_machine = Bliss::ParserMachine.new(path)
5
+
6
+ @parser_machine = Bliss::ParserMachine.new
7
+
8
+ @push_parser = Nokogiri::XML::SAX::PushParser.new(@parser_machine)
9
+
10
+ if filepath
11
+ @file = File.new(filepath, 'w')
12
+ @file.autoclose = false
13
+ end
14
+
15
+ @root = nil
16
+ @nodes = nil
17
+
18
+ on_root {}
19
+ end
20
+
21
+ def on_root(&block)
22
+ return false if not block.is_a? Proc
23
+ @parser_machine.on_root { |root|
24
+ @root = root
25
+ block.call(root)
26
+ }
27
+ end
28
+
29
+ def on_tag_open(element='default', &block)
30
+ return false if block.arity != 1
31
+
32
+ overriden_block = Proc.new { |depth|
33
+ if not element == 'default'
34
+ reset_unhandled_bytes
35
+ end
36
+ block.call(depth)
37
+ }
38
+ @parser_machine.on_tag_open(element, overriden_block)
39
+ end
40
+
41
+ def on_tag_close(element='default', &block)
42
+ overriden_block = Proc.new { |hash, depth|
43
+ #if not element == 'default'
44
+ reset_unhandled_bytes
45
+ #end
46
+ block.call(hash, depth)
47
+ }
48
+ @parser_machine.on_tag_close(element, overriden_block)
49
+ end
50
+
51
+ def on_max_unhandled_bytes(bytes, &block)
52
+ @max_unhandled_bytes = bytes
53
+ @on_max_unhandled_bytes = block
54
+ end
55
+
56
+ def wait_tag_close(element)
57
+ @wait_tag_close = "</#{element}>"
58
+ end
59
+
60
+ def reset_unhandled_bytes
61
+ return false if not check_unhandled_bytes?
62
+ @unhandled_bytes = 0
63
+ end
64
+
65
+ def check_unhandled_bytes
66
+ if @unhandled_bytes > @max_unhandled_bytes
67
+ if @on_max_unhandled_bytes
68
+ @on_max_unhandled_bytes.call
69
+ @on_max_unhandled_bytes = nil
70
+ end
71
+ #self.close
72
+ end
73
+ end
74
+
75
+ def exceeded?
76
+ return false if not check_unhandled_bytes?
77
+ if @unhandled_bytes > @max_unhandled_bytes
78
+ return true
79
+ end
80
+ end
81
+
82
+ def check_unhandled_bytes?
83
+ @max_unhandled_bytes ? true : false
84
+ end
85
+
86
+ def root
87
+ @root
88
+ end
89
+
90
+ def close
91
+ @parser_machine.close
6
92
  end
7
93
 
8
94
  def parse
9
- @parser_machine.parse
95
+ reset_unhandled_bytes if check_unhandled_bytes?
96
+
97
+ EM.run do
98
+ http = EM::HttpRequest.new(@path).get
99
+
100
+ @autodetect_compression = true
101
+ compression = :none
102
+ if @autodetect_compression
103
+ http.headers do
104
+ if (/^attachment.+filename.+\.gz/i === http.response_header['CONTENT_DISPOSITION']) or http.response_header.compressed? or ["application/octet-stream", "application/x-gzip"].include? http.response_header['CONTENT_TYPE']
105
+ @zstream = Zlib::Inflate.new(Zlib::MAX_WBITS+16)
106
+ compression = :gzip
107
+ end
108
+ end
109
+ end
110
+
111
+ http.stream { |chunk|
112
+ if chunk
113
+ chunk.force_encoding('UTF-8')
114
+
115
+ if check_unhandled_bytes?
116
+ @unhandled_bytes += chunk.length
117
+ check_unhandled_bytes
118
+ end
119
+ if not @parser_machine.is_closed?
120
+ begin
121
+ case compression
122
+ when :gzip
123
+ chunk = @zstream.inflate(chunk)
124
+ chunk.force_encoding('UTF-8')
125
+ end
126
+ @push_parser << chunk
127
+ if @file
128
+ @file << chunk
129
+ end
130
+ rescue Nokogiri::XML::SyntaxError => e
131
+ #puts 'encoding error'
132
+ if e.message.include?("encoding")
133
+ raise Bliss::EncodingError, "Wrong encoding given"
134
+ end
135
+ end
136
+
137
+ else
138
+ if exceeded?
139
+ #puts 'exceeded'
140
+ secure_close
141
+ else
142
+ if @file
143
+ if @wait_tag_close
144
+ #puts 'handle wait'
145
+ handle_wait_tag_close(chunk) #if @wait_tag_close
146
+ else
147
+ #puts 'secure close'
148
+ secure_close
149
+ end
150
+ end
151
+ end
152
+ end
153
+ end
154
+ }
155
+ http.errback {
156
+ #puts 'errback'
157
+ secure_close
158
+ }
159
+ http.callback {
160
+ #if @file
161
+ # @file.close
162
+ #end
163
+ #EM.stop
164
+ secure_close
165
+ }
166
+ end
167
+ file_close
10
168
  end
169
+
170
+ def autodetect_compression(http)
171
+ #compression = :none
172
+ puts compression
173
+ return compression
174
+ end
175
+
176
+ def handle_wait_tag_close(chunk)
177
+ begin
178
+ last_index = chunk.index(@wait_tag_close)
179
+ if last_index
180
+ last_index += 4
181
+ @file << chunk[0..last_index]
182
+ @file << "</#{self.root}>" # TODO set this by using actual depth, so all tags get closed
183
+ secure_close
184
+ else
185
+ @file << chunk
186
+ end
187
+ rescue
188
+ secure_close
189
+ end
190
+ end
191
+
192
+ def file_close
193
+ if @file
194
+ @file.close
195
+ end
196
+ end
197
+
198
+ def secure_close
199
+ begin
200
+ if @zstream
201
+ @zstream.close
202
+ end
203
+ rescue
204
+ ensure
205
+ EM.stop
206
+ #puts "Closed secure."
207
+ end
208
+ end
209
+
11
210
  end
12
211
  end
212
+
213
+ #require 'stringio'
214
+ #str = StringIO.new
215
+ #z = Zlib::GzipWriter.new(str)
216
+ #z.write(txt)
217
+ #z.close
@@ -1,161 +1,131 @@
1
1
  module Bliss
2
- class ParserMachine
3
- attr_writer :max_unhandled_bytes
2
+ class ParserMachine < Nokogiri::XML::SAX::Document
3
+ def initialize
4
+ @depth = []
5
+ # @settings = {} # downcased
4
6
 
5
- def initialize(path, filepath=nil)
6
- @path = path
7
-
8
- @sax_parser = Bliss::SaxParser.new
7
+ @root = nil
8
+ @nodes = {}
9
+ @current_node = {}
9
10
 
10
- @parser = Nokogiri::XML::SAX::PushParser.new(@sax_parser)
11
+ @on_root = nil
11
12
 
12
- if filepath
13
- @file = File.new(filepath, 'w')
14
- end
13
+ @on_tag_open = {}
14
+ @on_tag_close = {}
15
15
 
16
- @root = nil
17
- @nodes = nil
16
+ @closed = false
18
17
 
19
- on_root {}
20
18
  end
21
19
 
22
20
  def on_root(&block)
23
- return false if not block.is_a? Proc
24
- @sax_parser.on_root { |root|
25
- @root = root
26
- block.call(root)
27
- }
21
+ @on_root = block
28
22
  end
29
23
 
30
- def on_tag_open(element, &block)
31
- return false if block.arity != 1
32
-
33
- overriden_block = Proc.new { |depth|
34
- reset_unhandled_bytes
35
- block.call(depth)
36
- }
37
- @sax_parser.on_tag_open(element, overriden_block)
24
+ def on_tag_open(element, block)
25
+ @on_tag_open.merge!({element => block})
38
26
  end
39
27
 
40
- def on_tag_close(element, &block)
41
- overriden_block = Proc.new { |hash|
42
- reset_unhandled_bytes
43
- block.call(hash)
44
- }
45
- @sax_parser.on_tag_close(element, overriden_block)
28
+ def on_tag_close(element, block)
29
+ @on_tag_close.merge!({element => block})
46
30
  end
47
31
 
48
- def wait_tag_close(element)
49
- @wait_tag_close = "</#{element}>"
32
+ def close
33
+ @closed = true
50
34
  end
51
35
 
52
- def reset_unhandled_bytes
53
- return false if not check_unhandled_bytes?
54
- @unhandled_bytes = 0
36
+ def is_closed?
37
+ @closed
55
38
  end
56
39
 
57
- def check_unhandled_bytes
58
- if @unhandled_bytes > @max_unhandled_bytes
59
- self.close
40
+ def start_element(element, attributes)
41
+ return if is_closed?
42
+ # element_transformation
43
+
44
+ if @root == nil
45
+ @root = element
46
+ if @on_root.is_a? Proc
47
+ @on_root.call(@root)
48
+ end
60
49
  end
61
- end
62
50
 
63
- def exceeded?
64
- return false if not check_unhandled_bytes?
65
- if @unhandled_bytes > @max_unhandled_bytes
66
- return true
51
+ @depth.push(element) if @depth.last != element
52
+
53
+ if @on_tag_open.has_key? element
54
+ @on_tag_open[element].call(@depth)
55
+ elsif @on_tag_open.has_key? 'default'
56
+ @on_tag_open['default'].call(@depth)
67
57
  end
68
- end
69
58
 
70
- def check_unhandled_bytes?
71
- @max_unhandled_bytes ? true : false
72
- end
59
+ current = @nodes.pair_at_chain(@depth)
73
60
 
74
- def root
75
- @root
61
+ value_at = @nodes.value_at_chain(@depth)
62
+
63
+ if current.is_a? Hash
64
+ if value_at.is_a? NilClass
65
+ current[element] = {}
66
+ elsif value_at.is_a? Hash
67
+ if current[element].is_a? Array
68
+ current[element].concat [{}]
69
+ else
70
+ current[element] = [current[element], {}]
71
+ #current = @nodes.pair_at_chain(@depth)
72
+ end
73
+ elsif value_at.is_a? Array
74
+ #puts @depth.inspect
75
+ #puts current[element].inspect
76
+ #puts current[element].inspect
77
+ end
78
+ elsif current.is_a? Array
79
+ end
80
+
81
+ @current_content = ''
76
82
  end
77
83
 
78
- def close
79
- @sax_parser.close
84
+ def characters(string)
85
+ return if is_closed?
86
+ concat_content(string)
80
87
  end
81
88
 
82
- def parse
83
- reset_unhandled_bytes if check_unhandled_bytes?
84
-
85
- EM.run do
86
- http = EM::HttpRequest.new(@path).get
87
- http.stream { |chunk|
88
- if chunk
89
- chunk.force_encoding('UTF-8')
90
-
91
- @parser << chunk
92
-
93
- if check_unhandled_bytes?
94
- @unhandled_bytes += chunk.length
95
- check_unhandled_bytes
96
- end
97
-
98
- if not @sax_parser.is_closed?
99
- if @file
100
- @file << chunk
101
- end
102
- else
103
- if exceeded?
104
- #puts 'exceeded'
105
- secure_close
106
- else
107
- if @file
108
- if @wait_tag_close
109
- #puts 'handle wait'
110
- handle_wait_tag_close(chunk) #if @wait_tag_close
111
- else
112
- #puts 'secure close'
113
- secure_close
114
- end
115
- end
116
- end
117
- end
118
- end
119
- }
120
- http.callback {
121
- if @file
122
- @file.close
123
- end
124
- EM.stop
125
- }
126
- end
89
+ def cdata_block(string)
90
+ return if is_closed?
91
+ concat_content(string)
127
92
  end
128
-
129
- def handle_wait_tag_close(chunk)
130
- begin
131
- last_index = chunk.index(@wait_tag_close)
132
- if last_index
133
- last_index += 4
134
- @file << chunk[0..last_index]
135
- @file << "</#{self.root}>" # TODO set this by using actual depth, so all tags get closed
136
- secure_close
137
- else
138
- @file << chunk
93
+
94
+ def end_element(element, attributes=[])
95
+ return if is_closed?
96
+ # element_transformation
97
+
98
+ current = @nodes.pair_at_chain(@depth)
99
+ value_at = @nodes.value_at_chain(@depth)
100
+
101
+ if value_at.is_a? Hash
102
+ current[element] = @current_content if @current_content.size > 0
103
+ elsif value_at.is_a? NilClass
104
+ if current.is_a? Array
105
+ current = current.last
106
+ current[element] = @current_content if @current_content.size > 0
139
107
  end
140
- rescue
141
- secure_close
142
108
  end
109
+ @current_content = ''
110
+
111
+ if @on_tag_close.has_key? element
112
+ @on_tag_close[element].call(value_at, @depth)
113
+ elsif @on_tag_close.has_key? 'default'
114
+ @on_tag_close['default'].call(value_at, @depth)
115
+ end
116
+
117
+ @depth.pop if @depth.last == element
143
118
  end
144
119
 
145
- def secure_close
146
- begin
147
- @file.close
148
- rescue
149
- ensure
150
- EM.stop
120
+ def concat_content(string)
121
+ string.strip!
122
+ if string
123
+ @current_content << string
151
124
  end
152
125
  end
153
126
 
127
+ def end_document
128
+ puts @nodes.inspect
129
+ end
154
130
  end
155
131
  end
156
-
157
- #require 'stringio'
158
- #str = StringIO.new
159
- #z = Zlib::GzipWriter.new(str)
160
- #z.write(txt)
161
- #z.close