pseudoxml 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG ADDED
File without changes
data/RAKEFILE ADDED
@@ -0,0 +1,66 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'rake'
5
+ require 'rake/rdoctask'
6
+ require 'rake/gempackagetask'
7
+ require 'rake/contrib/rubyforgepublisher'
8
+ require 'pscp'
9
+
10
+ PACKAGE_VERSION = '0.1.0'
11
+
12
+ PACKAGE_FILES = FileList[
13
+ 'README',
14
+ 'CHANGELOG',
15
+ 'RAKEFILE',
16
+ 'lib/**/*.rb'
17
+ ].to_a
18
+
19
+ PROJECT = 'pseudoxml'
20
+
21
+ ENV['RUBYFORGE_USER'] = "ssmoot@rubyforge.org"
22
+ ENV['RUBYFORGE_PROJECT'] = "/var/www/gforge-projects/#{PROJECT}"
23
+
24
+ task :default => [:rdoc]
25
+
26
+ desc 'Generate Documentation'
27
+ rd = Rake::RDocTask.new do |rdoc|
28
+ rdoc.rdoc_dir = 'doc'
29
+ rdoc.title = "PseudoXml -- A not-quite Xml parser in pure-ruby"
30
+ rdoc.options << '--line-numbers' << '--inline-source' << '--main' << 'README'
31
+ rdoc.rdoc_files.include(PACKAGE_FILES)
32
+ end
33
+
34
+ gem_spec = Gem::Specification.new do |s|
35
+ s.platform = Gem::Platform::RUBY
36
+ s.name = PROJECT
37
+ s.summary = "PseudoXml -- A not-quite Xml parser in pure-ruby"
38
+ s.description = "Fast, low memory requirements for the streaming version, but no CDATA or Attribute support"
39
+ s.version = PACKAGE_VERSION
40
+
41
+ s.authors = 'Sam Smoot', 'Scott Bauer'
42
+ s.email = 'ssmoot@gmail.com; bauer.mail@gmail.com'
43
+ s.rubyforge_project = PROJECT
44
+ s.homepage = 'http://substantiality.net'
45
+
46
+ s.files = PACKAGE_FILES
47
+
48
+ s.require_path = 'lib'
49
+ s.requirements << 'none'
50
+ s.autorequire = 'rexchange'
51
+
52
+ s.has_rdoc = true
53
+ s.rdoc_options << '--line-numbers' << '--inline-source' << '--main' << 'README'
54
+ s.extra_rdoc_files = rd.rdoc_files.reject { |fn| fn =~ /\.rb$/ }.to_a
55
+ end
56
+
57
+ Rake::GemPackageTask.new(gem_spec) do |p|
58
+ p.gem_spec = gem_spec
59
+ p.need_tar = true
60
+ p.need_zip = true
61
+ end
62
+
63
+ desc "Publish RDOC to RubyForge"
64
+ task :rubyforge => [:rdoc, :gem] do
65
+ Rake::SshDirPublisher.new(ENV['RUBYFORGE_USER'], ENV['RUBYFORGE_PROJECT'], 'doc').upload
66
+ end
data/README ADDED
File without changes
data/lib/pseudo_xml.rb ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'pseudo_xml/document'
4
+ require 'pseudo_xml/simple_query'
5
+ require 'pseudo_xml/xpath_query'
@@ -0,0 +1,30 @@
1
+ module PseudoXml
2
+
3
+ class Collector
4
+
5
+ def initialize
6
+ @parsed_elements = []
7
+ end
8
+
9
+ def start_element(element_text)
10
+ @parsed_elements.push Element.new(element_text[/\w+/])
11
+ end
12
+
13
+ def end_element
14
+ if @parsed_elements.size > 1
15
+ node = @parsed_elements.pop
16
+ @parsed_elements.last.elements << node
17
+ end
18
+ end
19
+
20
+ def add_text(text)
21
+ if text.size > 0 && @parsed_elements.size > 0
22
+ @parsed_elements.last.text_nodes << text
23
+ end
24
+ end
25
+
26
+ def nodes
27
+ @parsed_elements
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,26 @@
1
+ require 'pseudo_xml/xml_string'
2
+ require 'pseudo_xml/collector'
3
+ require 'pseudo_xml/element'
4
+
5
+ module PseudoXml
6
+ class Document
7
+
8
+ def initialize(xml)
9
+ parser = XmlString.new(xml)
10
+ collector = Collector.new
11
+
12
+ parser.parse(collector)
13
+
14
+ @root = collector.nodes.first
15
+ end
16
+
17
+ def root
18
+ @root
19
+ end
20
+
21
+ def count_elements
22
+ return 0 unless @root
23
+ @root.count
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,37 @@
1
+ module PseudoXml
2
+
3
+ class Element
4
+
5
+ attr_reader :name
6
+
7
+ def initialize(name)
8
+ @name = name.strip
9
+ end
10
+
11
+ def text_nodes
12
+ @text_nodes ||= []
13
+ end
14
+
15
+ def text
16
+ self.text_nodes.join($/)
17
+ end
18
+
19
+ def elements
20
+ @elements ||= []
21
+ end
22
+
23
+ include Enumerable
24
+
25
+ def each
26
+ self.elements.each { |node| yield node }
27
+ end
28
+
29
+ def to_s
30
+ @name
31
+ end
32
+
33
+ def count
34
+ self.elements.inject(1) { |sum, node| sum += node.count }
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,39 @@
1
+ require 'pseudo_xml/xml_string'
2
+ require 'pseudo_xml/collector'
3
+
4
+ module PseudoXml
5
+ class SimpleQuery < Collector
6
+
7
+ protected :initialize
8
+
9
+ def self.find(xml, element_name, &b)
10
+ parser = XmlString.new(xml)
11
+ collector = self.new(element_name, &b)
12
+
13
+ parser.parse(collector)
14
+ end
15
+
16
+ def initialize(element_name, &b)
17
+ @parsed_elements = []
18
+ @find_element_name = element_name
19
+ @block = b
20
+ end
21
+
22
+ def start_element(element_text)
23
+ element_name = element_text[/\w+/]
24
+
25
+ if @parsed_elements.size > 1 || element_name == @find_element_name
26
+ @parsed_elements.push Element.new(element_text[/\w+/])
27
+ end
28
+ end
29
+
30
+ def end_element
31
+ if @parsed_elements.size > 1
32
+ node = @parsed_elements.pop
33
+ @parsed_elements.last.elements << node
34
+ elsif @parsed_elements.size == 1
35
+ @block.call(@parsed_elements.pop)
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,51 @@
1
+ module PseudoXml
2
+
3
+ class XmlString
4
+ def initialize(xml)
5
+ @xml = xml
6
+ reset!
7
+ end
8
+
9
+ def reset!
10
+ @offset_start = @offset_end = skip_prolog
11
+ end
12
+
13
+ def skip_prolog
14
+ if @xml =~ /^(\<\?.*?\?\>(\r?\n)*)/
15
+ $1.size
16
+ else
17
+ 0
18
+ end
19
+ end
20
+
21
+ def parse(collector)
22
+ until(eof?) do
23
+ @offset_start = @xml.index('<', @offset_end)
24
+
25
+ return nil unless @offset_start
26
+
27
+ if @offset_start > @offset_end + 1
28
+ collector.add_text @xml[@offset_end + 1...@offset_start]
29
+ end
30
+
31
+ @offset_end = @xml.index('>', @offset_start)
32
+
33
+ return nil unless @offset_end
34
+
35
+ element_text = @xml[@offset_start..@offset_end]
36
+
37
+ if element_text[1,1] == '/'
38
+ collector.end_element
39
+ else
40
+ collector.start_element element_text
41
+ end
42
+ end
43
+ end
44
+
45
+ def eof?
46
+ @offset_start.nil? || @offset_end.nil?
47
+ end
48
+
49
+ end
50
+
51
+ end
@@ -0,0 +1,55 @@
1
+ require 'pseudo_xml/xml_string'
2
+ require 'pseudo_xml/collector'
3
+ require 'pseudo_xml/simple_query'
4
+
5
+ module PseudoXml
6
+ class XPathQuery < Collector
7
+
8
+ protected :initialize
9
+
10
+ def self.find(xml, xpath, &b)
11
+ parser = XmlString.new(xml)
12
+ collector = self.new(xpath, &b)
13
+
14
+ parser.parse(collector)
15
+ end
16
+
17
+ def initialize(xpath, &b)
18
+ @parsed_elements = []
19
+ @current_path = []
20
+ @xpath = xpath.split('/')
21
+ @xpath.shift if @xpath.first == ''
22
+ @block = b
23
+ end
24
+
25
+ def start_element(element_text)
26
+ element_name = element_text[/\w+/]
27
+
28
+ @current_path.push element_name
29
+
30
+ if @parsed_elements.size > 1 || xpath_match?
31
+ @parsed_elements.push Element.new(element_text[/\w+/])
32
+ end
33
+ end
34
+
35
+ def xpath_match?
36
+ @xpath.size.times do |i|
37
+ return false unless @xpath[i] == '*' || @xpath[i] == @current_path[i]
38
+ end
39
+
40
+ return true
41
+ end
42
+
43
+ def end_element
44
+
45
+ @current_path.pop
46
+
47
+ if @parsed_elements.size > 1
48
+ node = @parsed_elements.pop
49
+ @parsed_elements.last.elements << node
50
+ elsif @parsed_elements.size == 1
51
+ @block.call(@parsed_elements.pop)
52
+ end
53
+ end
54
+ end
55
+ end
metadata ADDED
@@ -0,0 +1,56 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.8.10
3
+ specification_version: 1
4
+ name: pseudoxml
5
+ version: !ruby/object:Gem::Version
6
+ version: 0.1.0
7
+ date: 2006-02-01
8
+ summary: "PseudoXml -- A not-quite Xml parser in pure-ruby"
9
+ require_paths:
10
+ - lib
11
+ email: ssmoot@gmail.com; bauer.mail@gmail.com
12
+ homepage: http://substantiality.net
13
+ rubyforge_project: pseudoxml
14
+ description: "Fast, low memory requirements for the streaming version, but no CDATA or
15
+ Attribute support"
16
+ autorequire: rexchange
17
+ default_executable:
18
+ bindir: bin
19
+ has_rdoc: true
20
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
21
+ requirements:
22
+ -
23
+ - ">"
24
+ - !ruby/object:Gem::Version
25
+ version: 0.0.0
26
+ version:
27
+ platform: ruby
28
+ authors:
29
+ - Sam Smoot
30
+ - Scott Bauer
31
+ files:
32
+ - README
33
+ - CHANGELOG
34
+ - RAKEFILE
35
+ - lib/pseudo_xml.rb
36
+ - lib/pseudo_xml/collector.rb
37
+ - lib/pseudo_xml/document.rb
38
+ - lib/pseudo_xml/element.rb
39
+ - lib/pseudo_xml/simple_query.rb
40
+ - lib/pseudo_xml/xml_string.rb
41
+ - lib/pseudo_xml/xpath_query.rb
42
+ test_files: []
43
+ rdoc_options:
44
+ - "--line-numbers"
45
+ - "--inline-source"
46
+ - "--main"
47
+ - README
48
+ extra_rdoc_files:
49
+ - README
50
+ - CHANGELOG
51
+ - RAKEFILE
52
+ executables: []
53
+ extensions: []
54
+ requirements:
55
+ - none
56
+ dependencies: []