pseudoxml 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG ADDED
File without changes
data/RAKEFILE ADDED
@@ -0,0 +1,66 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'rake'
5
+ require 'rake/rdoctask'
6
+ require 'rake/gempackagetask'
7
+ require 'rake/contrib/rubyforgepublisher'
8
+ require 'pscp'
9
+
10
+ PACKAGE_VERSION = '0.1.0'
11
+
12
+ PACKAGE_FILES = FileList[
13
+ 'README',
14
+ 'CHANGELOG',
15
+ 'RAKEFILE',
16
+ 'lib/**/*.rb'
17
+ ].to_a
18
+
19
+ PROJECT = 'pseudoxml'
20
+
21
+ ENV['RUBYFORGE_USER'] = "ssmoot@rubyforge.org"
22
+ ENV['RUBYFORGE_PROJECT'] = "/var/www/gforge-projects/#{PROJECT}"
23
+
24
+ task :default => [:rdoc]
25
+
26
+ desc 'Generate Documentation'
27
+ rd = Rake::RDocTask.new do |rdoc|
28
+ rdoc.rdoc_dir = 'doc'
29
+ rdoc.title = "PseudoXml -- A not-quite Xml parser in pure-ruby"
30
+ rdoc.options << '--line-numbers' << '--inline-source' << '--main' << 'README'
31
+ rdoc.rdoc_files.include(PACKAGE_FILES)
32
+ end
33
+
34
+ gem_spec = Gem::Specification.new do |s|
35
+ s.platform = Gem::Platform::RUBY
36
+ s.name = PROJECT
37
+ s.summary = "PseudoXml -- A not-quite Xml parser in pure-ruby"
38
+ s.description = "Fast, low memory requirements for the streaming version, but no CDATA or Attribute support"
39
+ s.version = PACKAGE_VERSION
40
+
41
+ s.authors = 'Sam Smoot', 'Scott Bauer'
42
+ s.email = 'ssmoot@gmail.com; bauer.mail@gmail.com'
43
+ s.rubyforge_project = PROJECT
44
+ s.homepage = 'http://substantiality.net'
45
+
46
+ s.files = PACKAGE_FILES
47
+
48
+ s.require_path = 'lib'
49
+ s.requirements << 'none'
50
+ s.autorequire = 'rexchange'
51
+
52
+ s.has_rdoc = true
53
+ s.rdoc_options << '--line-numbers' << '--inline-source' << '--main' << 'README'
54
+ s.extra_rdoc_files = rd.rdoc_files.reject { |fn| fn =~ /\.rb$/ }.to_a
55
+ end
56
+
57
+ Rake::GemPackageTask.new(gem_spec) do |p|
58
+ p.gem_spec = gem_spec
59
+ p.need_tar = true
60
+ p.need_zip = true
61
+ end
62
+
63
+ desc "Publish RDOC to RubyForge"
64
+ task :rubyforge => [:rdoc, :gem] do
65
+ Rake::SshDirPublisher.new(ENV['RUBYFORGE_USER'], ENV['RUBYFORGE_PROJECT'], 'doc').upload
66
+ end
data/README ADDED
File without changes
data/lib/pseudo_xml.rb ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'pseudo_xml/document'
4
+ require 'pseudo_xml/simple_query'
5
+ require 'pseudo_xml/xpath_query'
@@ -0,0 +1,30 @@
1
+ module PseudoXml
2
+
3
+ class Collector
4
+
5
+ def initialize
6
+ @parsed_elements = []
7
+ end
8
+
9
+ def start_element(element_text)
10
+ @parsed_elements.push Element.new(element_text[/\w+/])
11
+ end
12
+
13
+ def end_element
14
+ if @parsed_elements.size > 1
15
+ node = @parsed_elements.pop
16
+ @parsed_elements.last.elements << node
17
+ end
18
+ end
19
+
20
+ def add_text(text)
21
+ if text.size > 0 && @parsed_elements.size > 0
22
+ @parsed_elements.last.text_nodes << text
23
+ end
24
+ end
25
+
26
+ def nodes
27
+ @parsed_elements
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,26 @@
1
+ require 'pseudo_xml/xml_string'
2
+ require 'pseudo_xml/collector'
3
+ require 'pseudo_xml/element'
4
+
5
+ module PseudoXml
6
+ class Document
7
+
8
+ def initialize(xml)
9
+ parser = XmlString.new(xml)
10
+ collector = Collector.new
11
+
12
+ parser.parse(collector)
13
+
14
+ @root = collector.nodes.first
15
+ end
16
+
17
+ def root
18
+ @root
19
+ end
20
+
21
+ def count_elements
22
+ return 0 unless @root
23
+ @root.count
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,37 @@
1
+ module PseudoXml
2
+
3
+ class Element
4
+
5
+ attr_reader :name
6
+
7
+ def initialize(name)
8
+ @name = name.strip
9
+ end
10
+
11
+ def text_nodes
12
+ @text_nodes ||= []
13
+ end
14
+
15
+ def text
16
+ self.text_nodes.join($/)
17
+ end
18
+
19
+ def elements
20
+ @elements ||= []
21
+ end
22
+
23
+ include Enumerable
24
+
25
+ def each
26
+ self.elements.each { |node| yield node }
27
+ end
28
+
29
+ def to_s
30
+ @name
31
+ end
32
+
33
+ def count
34
+ self.elements.inject(1) { |sum, node| sum += node.count }
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,39 @@
1
+ require 'pseudo_xml/xml_string'
2
+ require 'pseudo_xml/collector'
3
+
4
+ module PseudoXml
5
+ class SimpleQuery < Collector
6
+
7
+ protected :initialize
8
+
9
+ def self.find(xml, element_name, &b)
10
+ parser = XmlString.new(xml)
11
+ collector = self.new(element_name, &b)
12
+
13
+ parser.parse(collector)
14
+ end
15
+
16
+ def initialize(element_name, &b)
17
+ @parsed_elements = []
18
+ @find_element_name = element_name
19
+ @block = b
20
+ end
21
+
22
+ def start_element(element_text)
23
+ element_name = element_text[/\w+/]
24
+
25
+ if @parsed_elements.size > 1 || element_name == @find_element_name
26
+ @parsed_elements.push Element.new(element_text[/\w+/])
27
+ end
28
+ end
29
+
30
+ def end_element
31
+ if @parsed_elements.size > 1
32
+ node = @parsed_elements.pop
33
+ @parsed_elements.last.elements << node
34
+ elsif @parsed_elements.size == 1
35
+ @block.call(@parsed_elements.pop)
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,51 @@
1
+ module PseudoXml
2
+
3
+ class XmlString
4
+ def initialize(xml)
5
+ @xml = xml
6
+ reset!
7
+ end
8
+
9
+ def reset!
10
+ @offset_start = @offset_end = skip_prolog
11
+ end
12
+
13
+ def skip_prolog
14
+ if @xml =~ /^(\<\?.*?\?\>(\r?\n)*)/
15
+ $1.size
16
+ else
17
+ 0
18
+ end
19
+ end
20
+
21
+ def parse(collector)
22
+ until(eof?) do
23
+ @offset_start = @xml.index('<', @offset_end)
24
+
25
+ return nil unless @offset_start
26
+
27
+ if @offset_start > @offset_end + 1
28
+ collector.add_text @xml[@offset_end + 1...@offset_start]
29
+ end
30
+
31
+ @offset_end = @xml.index('>', @offset_start)
32
+
33
+ return nil unless @offset_end
34
+
35
+ element_text = @xml[@offset_start..@offset_end]
36
+
37
+ if element_text[1,1] == '/'
38
+ collector.end_element
39
+ else
40
+ collector.start_element element_text
41
+ end
42
+ end
43
+ end
44
+
45
+ def eof?
46
+ @offset_start.nil? || @offset_end.nil?
47
+ end
48
+
49
+ end
50
+
51
+ end
@@ -0,0 +1,55 @@
1
+ require 'pseudo_xml/xml_string'
2
+ require 'pseudo_xml/collector'
3
+ require 'pseudo_xml/simple_query'
4
+
5
+ module PseudoXml
6
+ class XPathQuery < Collector
7
+
8
+ protected :initialize
9
+
10
+ def self.find(xml, xpath, &b)
11
+ parser = XmlString.new(xml)
12
+ collector = self.new(xpath, &b)
13
+
14
+ parser.parse(collector)
15
+ end
16
+
17
+ def initialize(xpath, &b)
18
+ @parsed_elements = []
19
+ @current_path = []
20
+ @xpath = xpath.split('/')
21
+ @xpath.shift if @xpath.first == ''
22
+ @block = b
23
+ end
24
+
25
+ def start_element(element_text)
26
+ element_name = element_text[/\w+/]
27
+
28
+ @current_path.push element_name
29
+
30
+ if @parsed_elements.size > 1 || xpath_match?
31
+ @parsed_elements.push Element.new(element_text[/\w+/])
32
+ end
33
+ end
34
+
35
+ def xpath_match?
36
+ @xpath.size.times do |i|
37
+ return false unless @xpath[i] == '*' || @xpath[i] == @current_path[i]
38
+ end
39
+
40
+ return true
41
+ end
42
+
43
+ def end_element
44
+
45
+ @current_path.pop
46
+
47
+ if @parsed_elements.size > 1
48
+ node = @parsed_elements.pop
49
+ @parsed_elements.last.elements << node
50
+ elsif @parsed_elements.size == 1
51
+ @block.call(@parsed_elements.pop)
52
+ end
53
+ end
54
+ end
55
+ end
metadata ADDED
@@ -0,0 +1,56 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.8.10
3
+ specification_version: 1
4
+ name: pseudoxml
5
+ version: !ruby/object:Gem::Version
6
+ version: 0.1.0
7
+ date: 2006-02-01
8
+ summary: "PseudoXml -- A not-quite Xml parser in pure-ruby"
9
+ require_paths:
10
+ - lib
11
+ email: ssmoot@gmail.com; bauer.mail@gmail.com
12
+ homepage: http://substantiality.net
13
+ rubyforge_project: pseudoxml
14
+ description: "Fast, low memory requirements for the streaming version, but no CDATA or
15
+ Attribute support"
16
+ autorequire: rexchange
17
+ default_executable:
18
+ bindir: bin
19
+ has_rdoc: true
20
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
21
+ requirements:
22
+ -
23
+ - ">"
24
+ - !ruby/object:Gem::Version
25
+ version: 0.0.0
26
+ version:
27
+ platform: ruby
28
+ authors:
29
+ - Sam Smoot
30
+ - Scott Bauer
31
+ files:
32
+ - README
33
+ - CHANGELOG
34
+ - RAKEFILE
35
+ - lib/pseudo_xml.rb
36
+ - lib/pseudo_xml/collector.rb
37
+ - lib/pseudo_xml/document.rb
38
+ - lib/pseudo_xml/element.rb
39
+ - lib/pseudo_xml/simple_query.rb
40
+ - lib/pseudo_xml/xml_string.rb
41
+ - lib/pseudo_xml/xpath_query.rb
42
+ test_files: []
43
+ rdoc_options:
44
+ - "--line-numbers"
45
+ - "--inline-source"
46
+ - "--main"
47
+ - README
48
+ extra_rdoc_files:
49
+ - README
50
+ - CHANGELOG
51
+ - RAKEFILE
52
+ executables: []
53
+ extensions: []
54
+ requirements:
55
+ - none
56
+ dependencies: []