bliss 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -3,9 +3,9 @@ source "http://rubygems.org"
3
3
  # Example:
4
4
  # gem "activesupport", ">= 2.3.5"
5
5
 
6
- gem "nokogiri"
7
- gem "eventmachine", ">= 0.12"
8
- gem "em-http-request"
6
+ gem "nokogiri", ">= 1.5.2"
7
+ gem "eventmachine", ">= 1.0.0.beta.4"
8
+ gem "em-http-request", ">= 1.0.2"
9
9
 
10
10
  # Add dependencies to develop your gem here.
11
11
  # Include everything needed to run rake, tests, features, etc.
data/Gemfile.lock CHANGED
@@ -3,7 +3,7 @@ GEM
3
3
  specs:
4
4
  addressable (2.2.7)
5
5
  cookiejar (0.3.0)
6
- em-http-request (1.0.1)
6
+ em-http-request (1.0.2)
7
7
  addressable (>= 2.2.3)
8
8
  cookiejar
9
9
  em-socksify
@@ -19,10 +19,14 @@ GEM
19
19
  git (>= 1.2.5)
20
20
  rake
21
21
  multi_json (1.1.0)
22
- nokogiri (1.5.0)
22
+ nokogiri (1.5.2)
23
23
  rake (0.9.2.2)
24
- shoulda (2.11.3)
25
- simplecov (0.6.0)
24
+ shoulda (3.0.1)
25
+ shoulda-context (~> 1.0.0)
26
+ shoulda-matchers (~> 1.0.0)
27
+ shoulda-context (1.0.0)
28
+ shoulda-matchers (1.0.0)
29
+ simplecov (0.6.1)
26
30
  multi_json (~> 1.0)
27
31
  simplecov-html (~> 0.5.3)
28
32
  simplecov-html (0.5.3)
@@ -32,9 +36,9 @@ PLATFORMS
32
36
 
33
37
  DEPENDENCIES
34
38
  bundler (~> 1.0.0)
35
- em-http-request
36
- eventmachine (>= 0.12)
39
+ em-http-request (>= 1.0.2)
40
+ eventmachine (>= 1.0.0.beta.4)
37
41
  jeweler (~> 1.6.4)
38
- nokogiri
42
+ nokogiri (>= 1.5.2)
39
43
  shoulda
40
44
  simplecov
data/Rakefile CHANGED
@@ -21,6 +21,7 @@ Jeweler::Tasks.new do |gem|
21
21
  gem.description = %Q{streamed xml parsing tool}
22
22
  gem.email = "krakatoa1987@gmail.com"
23
23
  gem.authors = ["Fernando Alonso"]
24
+ gem.require_paths = ["lib"]
24
25
  # dependencies defined in Gemfile
25
26
  end
26
27
  Jeweler::RubygemsDotOrgTasks.new
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.5
1
+ 0.0.6
data/bliss.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "bliss"
8
- s.version = "0.0.5"
8
+ s.version = "0.0.6"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Fernando Alonso"]
12
- s.date = "2012-03-01"
12
+ s.date = "2012-03-21"
13
13
  s.description = "streamed xml parsing tool"
14
14
  s.email = "krakatoa1987@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -46,26 +46,26 @@ Gem::Specification.new do |s|
46
46
  s.specification_version = 3
47
47
 
48
48
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
49
- s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
50
- s.add_runtime_dependency(%q<eventmachine>, [">= 0.12"])
51
- s.add_runtime_dependency(%q<em-http-request>, [">= 0"])
49
+ s.add_runtime_dependency(%q<nokogiri>, [">= 1.5.2"])
50
+ s.add_runtime_dependency(%q<eventmachine>, [">= 1.0.0.beta.4"])
51
+ s.add_runtime_dependency(%q<em-http-request>, [">= 1.0.2"])
52
52
  s.add_development_dependency(%q<shoulda>, [">= 0"])
53
53
  s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
54
54
  s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
55
55
  s.add_development_dependency(%q<simplecov>, [">= 0"])
56
56
  else
57
- s.add_dependency(%q<nokogiri>, [">= 0"])
58
- s.add_dependency(%q<eventmachine>, [">= 0.12"])
59
- s.add_dependency(%q<em-http-request>, [">= 0"])
57
+ s.add_dependency(%q<nokogiri>, [">= 1.5.2"])
58
+ s.add_dependency(%q<eventmachine>, [">= 1.0.0.beta.4"])
59
+ s.add_dependency(%q<em-http-request>, [">= 1.0.2"])
60
60
  s.add_dependency(%q<shoulda>, [">= 0"])
61
61
  s.add_dependency(%q<bundler>, ["~> 1.0.0"])
62
62
  s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
63
63
  s.add_dependency(%q<simplecov>, [">= 0"])
64
64
  end
65
65
  else
66
- s.add_dependency(%q<nokogiri>, [">= 0"])
67
- s.add_dependency(%q<eventmachine>, [">= 0.12"])
68
- s.add_dependency(%q<em-http-request>, [">= 0"])
66
+ s.add_dependency(%q<nokogiri>, [">= 1.5.2"])
67
+ s.add_dependency(%q<eventmachine>, [">= 1.0.0.beta.4"])
68
+ s.add_dependency(%q<em-http-request>, [">= 1.0.2"])
69
69
  s.add_dependency(%q<shoulda>, [">= 0"])
70
70
  s.add_dependency(%q<bundler>, ["~> 1.0.0"])
71
71
  s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
@@ -1,5 +1,7 @@
1
1
  module Bliss
2
2
  class ParserMachine
3
+ attr_writer :max_unhandled_bytes
4
+
3
5
  def initialize(path, filepath=nil)
4
6
  @path = path
5
7
 
@@ -27,11 +29,46 @@ module Bliss
27
29
 
28
30
  def on_tag_open(element, &block)
29
31
  return false if block.arity != 1
30
- @sax_parser.on_tag_open(element, block)
32
+
33
+ overriden_block = Proc.new { |depth|
34
+ reset_unhandled_bytes
35
+ block.call(depth)
36
+ }
37
+ @sax_parser.on_tag_open(element, overriden_block)
31
38
  end
32
39
 
33
40
  def on_tag_close(element, &block)
34
- @sax_parser.on_tag_close(element, block)
41
+ overriden_block = Proc.new { |hash|
42
+ reset_unhandled_bytes
43
+ block.call(hash)
44
+ }
45
+ @sax_parser.on_tag_close(element, overriden_block)
46
+ end
47
+
48
+ def wait_tag_close(element)
49
+ @wait_tag_close = "</#{element}>"
50
+ end
51
+
52
+ def reset_unhandled_bytes
53
+ return false if not check_unhandled_bytes?
54
+ @unhandled_bytes = 0
55
+ end
56
+
57
+ def check_unhandled_bytes
58
+ if @unhandled_bytes > @max_unhandled_bytes
59
+ self.close
60
+ end
61
+ end
62
+
63
+ def exceeded?
64
+ return false if not check_unhandled_bytes?
65
+ if @unhandled_bytes > @max_unhandled_bytes
66
+ return true
67
+ end
68
+ end
69
+
70
+ def check_unhandled_bytes?
71
+ @max_unhandled_bytes ? true : false
35
72
  end
36
73
 
37
74
  def root
@@ -43,33 +80,39 @@ module Bliss
43
80
  end
44
81
 
45
82
  def parse
46
- @bytes = 0
83
+ reset_unhandled_bytes if check_unhandled_bytes?
47
84
 
48
85
  EM.run do
49
86
  http = EM::HttpRequest.new(@path).get
50
87
  http.stream { |chunk|
51
- chunk.force_encoding('UTF-8')
88
+ if chunk
89
+ chunk.force_encoding('UTF-8')
52
90
 
53
- @parser << chunk
91
+ @parser << chunk
54
92
 
55
- @bytes += chunk.length
56
-
57
- if not @sax_parser.is_closed?
58
- if @file
59
- @file << chunk
93
+ if check_unhandled_bytes?
94
+ @unhandled_bytes += chunk.length
95
+ check_unhandled_bytes
60
96
  end
61
- else
62
- if @file
63
- last_index = chunk.index('</ad>') + 4
64
- begin
65
- @file << chunk[0..last_index]
66
- @file << "</#{self.root}>"
67
- ensure
68
- @file.close
97
+
98
+ if not @sax_parser.is_closed?
99
+ if @file
100
+ @file << chunk
101
+ end
102
+ else
103
+ if @file and not exceeded? and @wait_tag_close
104
+ handle_wait_tag_close(chunk) #if @wait_tag_close
105
+ else
106
+ begin
107
+ if @file
108
+ @file.close
109
+ end
110
+ ensure
111
+ EM.stop
112
+ end
69
113
  end
70
- end
71
114
 
72
- EM.stop
115
+ end
73
116
  end
74
117
  }
75
118
  http.callback {
@@ -80,6 +123,29 @@ module Bliss
80
123
  }
81
124
  end
82
125
  end
126
+
127
+ def handle_wait_tag_close(chunk)
128
+ begin
129
+ last_index = chunk.index(@wait_tag_close)
130
+ if last_index
131
+ last_index += 4
132
+ @file << chunk[0..last_index]
133
+ @file << "</#{self.root}>" # TODO set this by using actual depth, so all tags get closed
134
+ @file.close
135
+ EM.stop
136
+ else
137
+ @file << chunk
138
+ end
139
+ rescue
140
+ begin
141
+ @file.close
142
+ rescue
143
+ ensure
144
+ EM.stop
145
+ end
146
+ end
147
+ end
148
+
83
149
  end
84
150
  end
85
151
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bliss
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,44 +9,44 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-03-01 00:00:00.000000000 Z
12
+ date: 2012-03-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
16
- requirement: &10621740 !ruby/object:Gem::Requirement
16
+ requirement: &16081520 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
20
20
  - !ruby/object:Gem::Version
21
- version: '0'
21
+ version: 1.5.2
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *10621740
24
+ version_requirements: *16081520
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: eventmachine
27
- requirement: &10620860 !ruby/object:Gem::Requirement
27
+ requirement: &16079400 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
31
31
  - !ruby/object:Gem::Version
32
- version: '0.12'
32
+ version: 1.0.0.beta.4
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *10620860
35
+ version_requirements: *16079400
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: em-http-request
38
- requirement: &10619800 !ruby/object:Gem::Requirement
38
+ requirement: &16076620 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
42
42
  - !ruby/object:Gem::Version
43
- version: '0'
43
+ version: 1.0.2
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *10619800
46
+ version_requirements: *16076620
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: shoulda
49
- requirement: &10617960 !ruby/object:Gem::Requirement
49
+ requirement: &16074940 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *10617960
57
+ version_requirements: *16074940
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: bundler
60
- requirement: &10616320 !ruby/object:Gem::Requirement
60
+ requirement: &16088940 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 1.0.0
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *10616320
68
+ version_requirements: *16088940
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: jeweler
71
- requirement: &10615260 !ruby/object:Gem::Requirement
71
+ requirement: &16083840 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ~>
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: 1.6.4
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *10615260
79
+ version_requirements: *16083840
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: simplecov
82
- requirement: &10725500 !ruby/object:Gem::Requirement
82
+ requirement: &16094900 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ! '>='
@@ -87,7 +87,7 @@ dependencies:
87
87
  version: '0'
88
88
  type: :development
89
89
  prerelease: false
90
- version_requirements: *10725500
90
+ version_requirements: *16094900
91
91
  description: streamed xml parsing tool
92
92
  email: krakatoa1987@gmail.com
93
93
  executables: []
@@ -129,7 +129,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
129
129
  version: '0'
130
130
  segments:
131
131
  - 0
132
- hash: -3470161500999733584
132
+ hash: -3872124772469338070
133
133
  required_rubygems_version: !ruby/object:Gem::Requirement
134
134
  none: false
135
135
  requirements: