bliss 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -3,9 +3,9 @@ source "http://rubygems.org"
3
3
  # Example:
4
4
  # gem "activesupport", ">= 2.3.5"
5
5
 
6
- gem "nokogiri"
7
- gem "eventmachine", ">= 0.12"
8
- gem "em-http-request"
6
+ gem "nokogiri", ">= 1.5.2"
7
+ gem "eventmachine", ">= 1.0.0.beta.4"
8
+ gem "em-http-request", ">= 1.0.2"
9
9
 
10
10
  # Add dependencies to develop your gem here.
11
11
  # Include everything needed to run rake, tests, features, etc.
data/Gemfile.lock CHANGED
@@ -3,7 +3,7 @@ GEM
3
3
  specs:
4
4
  addressable (2.2.7)
5
5
  cookiejar (0.3.0)
6
- em-http-request (1.0.1)
6
+ em-http-request (1.0.2)
7
7
  addressable (>= 2.2.3)
8
8
  cookiejar
9
9
  em-socksify
@@ -19,10 +19,14 @@ GEM
19
19
  git (>= 1.2.5)
20
20
  rake
21
21
  multi_json (1.1.0)
22
- nokogiri (1.5.0)
22
+ nokogiri (1.5.2)
23
23
  rake (0.9.2.2)
24
- shoulda (2.11.3)
25
- simplecov (0.6.0)
24
+ shoulda (3.0.1)
25
+ shoulda-context (~> 1.0.0)
26
+ shoulda-matchers (~> 1.0.0)
27
+ shoulda-context (1.0.0)
28
+ shoulda-matchers (1.0.0)
29
+ simplecov (0.6.1)
26
30
  multi_json (~> 1.0)
27
31
  simplecov-html (~> 0.5.3)
28
32
  simplecov-html (0.5.3)
@@ -32,9 +36,9 @@ PLATFORMS
32
36
 
33
37
  DEPENDENCIES
34
38
  bundler (~> 1.0.0)
35
- em-http-request
36
- eventmachine (>= 0.12)
39
+ em-http-request (>= 1.0.2)
40
+ eventmachine (>= 1.0.0.beta.4)
37
41
  jeweler (~> 1.6.4)
38
- nokogiri
42
+ nokogiri (>= 1.5.2)
39
43
  shoulda
40
44
  simplecov
data/Rakefile CHANGED
@@ -21,6 +21,7 @@ Jeweler::Tasks.new do |gem|
21
21
  gem.description = %Q{streamed xml parsing tool}
22
22
  gem.email = "krakatoa1987@gmail.com"
23
23
  gem.authors = ["Fernando Alonso"]
24
+ gem.require_paths = ["lib"]
24
25
  # dependencies defined in Gemfile
25
26
  end
26
27
  Jeweler::RubygemsDotOrgTasks.new
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.5
1
+ 0.0.6
data/bliss.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "bliss"
8
- s.version = "0.0.5"
8
+ s.version = "0.0.6"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Fernando Alonso"]
12
- s.date = "2012-03-01"
12
+ s.date = "2012-03-21"
13
13
  s.description = "streamed xml parsing tool"
14
14
  s.email = "krakatoa1987@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -46,26 +46,26 @@ Gem::Specification.new do |s|
46
46
  s.specification_version = 3
47
47
 
48
48
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
49
- s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
50
- s.add_runtime_dependency(%q<eventmachine>, [">= 0.12"])
51
- s.add_runtime_dependency(%q<em-http-request>, [">= 0"])
49
+ s.add_runtime_dependency(%q<nokogiri>, [">= 1.5.2"])
50
+ s.add_runtime_dependency(%q<eventmachine>, [">= 1.0.0.beta.4"])
51
+ s.add_runtime_dependency(%q<em-http-request>, [">= 1.0.2"])
52
52
  s.add_development_dependency(%q<shoulda>, [">= 0"])
53
53
  s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
54
54
  s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
55
55
  s.add_development_dependency(%q<simplecov>, [">= 0"])
56
56
  else
57
- s.add_dependency(%q<nokogiri>, [">= 0"])
58
- s.add_dependency(%q<eventmachine>, [">= 0.12"])
59
- s.add_dependency(%q<em-http-request>, [">= 0"])
57
+ s.add_dependency(%q<nokogiri>, [">= 1.5.2"])
58
+ s.add_dependency(%q<eventmachine>, [">= 1.0.0.beta.4"])
59
+ s.add_dependency(%q<em-http-request>, [">= 1.0.2"])
60
60
  s.add_dependency(%q<shoulda>, [">= 0"])
61
61
  s.add_dependency(%q<bundler>, ["~> 1.0.0"])
62
62
  s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
63
63
  s.add_dependency(%q<simplecov>, [">= 0"])
64
64
  end
65
65
  else
66
- s.add_dependency(%q<nokogiri>, [">= 0"])
67
- s.add_dependency(%q<eventmachine>, [">= 0.12"])
68
- s.add_dependency(%q<em-http-request>, [">= 0"])
66
+ s.add_dependency(%q<nokogiri>, [">= 1.5.2"])
67
+ s.add_dependency(%q<eventmachine>, [">= 1.0.0.beta.4"])
68
+ s.add_dependency(%q<em-http-request>, [">= 1.0.2"])
69
69
  s.add_dependency(%q<shoulda>, [">= 0"])
70
70
  s.add_dependency(%q<bundler>, ["~> 1.0.0"])
71
71
  s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
@@ -1,5 +1,7 @@
1
1
  module Bliss
2
2
  class ParserMachine
3
+ attr_writer :max_unhandled_bytes
4
+
3
5
  def initialize(path, filepath=nil)
4
6
  @path = path
5
7
 
@@ -27,11 +29,46 @@ module Bliss
27
29
 
28
30
  def on_tag_open(element, &block)
29
31
  return false if block.arity != 1
30
- @sax_parser.on_tag_open(element, block)
32
+
33
+ overriden_block = Proc.new { |depth|
34
+ reset_unhandled_bytes
35
+ block.call(depth)
36
+ }
37
+ @sax_parser.on_tag_open(element, overriden_block)
31
38
  end
32
39
 
33
40
  def on_tag_close(element, &block)
34
- @sax_parser.on_tag_close(element, block)
41
+ overriden_block = Proc.new { |hash|
42
+ reset_unhandled_bytes
43
+ block.call(hash)
44
+ }
45
+ @sax_parser.on_tag_close(element, overriden_block)
46
+ end
47
+
48
+ def wait_tag_close(element)
49
+ @wait_tag_close = "</#{element}>"
50
+ end
51
+
52
+ def reset_unhandled_bytes
53
+ return false if not check_unhandled_bytes?
54
+ @unhandled_bytes = 0
55
+ end
56
+
57
+ def check_unhandled_bytes
58
+ if @unhandled_bytes > @max_unhandled_bytes
59
+ self.close
60
+ end
61
+ end
62
+
63
+ def exceeded?
64
+ return false if not check_unhandled_bytes?
65
+ if @unhandled_bytes > @max_unhandled_bytes
66
+ return true
67
+ end
68
+ end
69
+
70
+ def check_unhandled_bytes?
71
+ @max_unhandled_bytes ? true : false
35
72
  end
36
73
 
37
74
  def root
@@ -43,33 +80,39 @@ module Bliss
43
80
  end
44
81
 
45
82
  def parse
46
- @bytes = 0
83
+ reset_unhandled_bytes if check_unhandled_bytes?
47
84
 
48
85
  EM.run do
49
86
  http = EM::HttpRequest.new(@path).get
50
87
  http.stream { |chunk|
51
- chunk.force_encoding('UTF-8')
88
+ if chunk
89
+ chunk.force_encoding('UTF-8')
52
90
 
53
- @parser << chunk
91
+ @parser << chunk
54
92
 
55
- @bytes += chunk.length
56
-
57
- if not @sax_parser.is_closed?
58
- if @file
59
- @file << chunk
93
+ if check_unhandled_bytes?
94
+ @unhandled_bytes += chunk.length
95
+ check_unhandled_bytes
60
96
  end
61
- else
62
- if @file
63
- last_index = chunk.index('</ad>') + 4
64
- begin
65
- @file << chunk[0..last_index]
66
- @file << "</#{self.root}>"
67
- ensure
68
- @file.close
97
+
98
+ if not @sax_parser.is_closed?
99
+ if @file
100
+ @file << chunk
101
+ end
102
+ else
103
+ if @file and not exceeded? and @wait_tag_close
104
+ handle_wait_tag_close(chunk) #if @wait_tag_close
105
+ else
106
+ begin
107
+ if @file
108
+ @file.close
109
+ end
110
+ ensure
111
+ EM.stop
112
+ end
69
113
  end
70
- end
71
114
 
72
- EM.stop
115
+ end
73
116
  end
74
117
  }
75
118
  http.callback {
@@ -80,6 +123,29 @@ module Bliss
80
123
  }
81
124
  end
82
125
  end
126
+
127
+ def handle_wait_tag_close(chunk)
128
+ begin
129
+ last_index = chunk.index(@wait_tag_close)
130
+ if last_index
131
+ last_index += 4
132
+ @file << chunk[0..last_index]
133
+ @file << "</#{self.root}>" # TODO set this by using actual depth, so all tags get closed
134
+ @file.close
135
+ EM.stop
136
+ else
137
+ @file << chunk
138
+ end
139
+ rescue
140
+ begin
141
+ @file.close
142
+ rescue
143
+ ensure
144
+ EM.stop
145
+ end
146
+ end
147
+ end
148
+
83
149
  end
84
150
  end
85
151
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bliss
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,44 +9,44 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-03-01 00:00:00.000000000 Z
12
+ date: 2012-03-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
16
- requirement: &10621740 !ruby/object:Gem::Requirement
16
+ requirement: &16081520 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
20
20
  - !ruby/object:Gem::Version
21
- version: '0'
21
+ version: 1.5.2
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *10621740
24
+ version_requirements: *16081520
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: eventmachine
27
- requirement: &10620860 !ruby/object:Gem::Requirement
27
+ requirement: &16079400 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
31
31
  - !ruby/object:Gem::Version
32
- version: '0.12'
32
+ version: 1.0.0.beta.4
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *10620860
35
+ version_requirements: *16079400
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: em-http-request
38
- requirement: &10619800 !ruby/object:Gem::Requirement
38
+ requirement: &16076620 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
42
42
  - !ruby/object:Gem::Version
43
- version: '0'
43
+ version: 1.0.2
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *10619800
46
+ version_requirements: *16076620
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: shoulda
49
- requirement: &10617960 !ruby/object:Gem::Requirement
49
+ requirement: &16074940 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *10617960
57
+ version_requirements: *16074940
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: bundler
60
- requirement: &10616320 !ruby/object:Gem::Requirement
60
+ requirement: &16088940 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 1.0.0
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *10616320
68
+ version_requirements: *16088940
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: jeweler
71
- requirement: &10615260 !ruby/object:Gem::Requirement
71
+ requirement: &16083840 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ~>
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: 1.6.4
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *10615260
79
+ version_requirements: *16083840
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: simplecov
82
- requirement: &10725500 !ruby/object:Gem::Requirement
82
+ requirement: &16094900 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ! '>='
@@ -87,7 +87,7 @@ dependencies:
87
87
  version: '0'
88
88
  type: :development
89
89
  prerelease: false
90
- version_requirements: *10725500
90
+ version_requirements: *16094900
91
91
  description: streamed xml parsing tool
92
92
  email: krakatoa1987@gmail.com
93
93
  executables: []
@@ -129,7 +129,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
129
129
  version: '0'
130
130
  segments:
131
131
  - 0
132
- hash: -3470161500999733584
132
+ hash: -3872124772469338070
133
133
  required_rubygems_version: !ruby/object:Gem::Requirement
134
134
  none: false
135
135
  requirements: