pseudoxml 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +0 -0
- data/RAKEFILE +66 -0
- data/README +0 -0
- data/lib/pseudo_xml.rb +5 -0
- data/lib/pseudo_xml/collector.rb +30 -0
- data/lib/pseudo_xml/document.rb +26 -0
- data/lib/pseudo_xml/element.rb +37 -0
- data/lib/pseudo_xml/simple_query.rb +39 -0
- data/lib/pseudo_xml/xml_string.rb +51 -0
- data/lib/pseudo_xml/xpath_query.rb +55 -0
- metadata +56 -0
data/CHANGELOG
ADDED
File without changes
|
data/RAKEFILE
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'rake'
|
5
|
+
require 'rake/rdoctask'
|
6
|
+
require 'rake/gempackagetask'
|
7
|
+
require 'rake/contrib/rubyforgepublisher'
|
8
|
+
require 'pscp'
|
9
|
+
|
10
|
+
PACKAGE_VERSION = '0.1.0'
|
11
|
+
|
12
|
+
PACKAGE_FILES = FileList[
|
13
|
+
'README',
|
14
|
+
'CHANGELOG',
|
15
|
+
'RAKEFILE',
|
16
|
+
'lib/**/*.rb'
|
17
|
+
].to_a
|
18
|
+
|
19
|
+
PROJECT = 'pseudoxml'
|
20
|
+
|
21
|
+
ENV['RUBYFORGE_USER'] = "ssmoot@rubyforge.org"
|
22
|
+
ENV['RUBYFORGE_PROJECT'] = "/var/www/gforge-projects/#{PROJECT}"
|
23
|
+
|
24
|
+
task :default => [:rdoc]
|
25
|
+
|
26
|
+
desc 'Generate Documentation'
|
27
|
+
rd = Rake::RDocTask.new do |rdoc|
|
28
|
+
rdoc.rdoc_dir = 'doc'
|
29
|
+
rdoc.title = "PseudoXml -- A not-quite Xml parser in pure-ruby"
|
30
|
+
rdoc.options << '--line-numbers' << '--inline-source' << '--main' << 'README'
|
31
|
+
rdoc.rdoc_files.include(PACKAGE_FILES)
|
32
|
+
end
|
33
|
+
|
34
|
+
gem_spec = Gem::Specification.new do |s|
|
35
|
+
s.platform = Gem::Platform::RUBY
|
36
|
+
s.name = PROJECT
|
37
|
+
s.summary = "PseudoXml -- A not-quite Xml parser in pure-ruby"
|
38
|
+
s.description = "Fast, low memory requirements for the streaming version, but no CDATA or Attribute support"
|
39
|
+
s.version = PACKAGE_VERSION
|
40
|
+
|
41
|
+
s.authors = 'Sam Smoot', 'Scott Bauer'
|
42
|
+
s.email = 'ssmoot@gmail.com; bauer.mail@gmail.com'
|
43
|
+
s.rubyforge_project = PROJECT
|
44
|
+
s.homepage = 'http://substantiality.net'
|
45
|
+
|
46
|
+
s.files = PACKAGE_FILES
|
47
|
+
|
48
|
+
s.require_path = 'lib'
|
49
|
+
s.requirements << 'none'
|
50
|
+
s.autorequire = 'rexchange'
|
51
|
+
|
52
|
+
s.has_rdoc = true
|
53
|
+
s.rdoc_options << '--line-numbers' << '--inline-source' << '--main' << 'README'
|
54
|
+
s.extra_rdoc_files = rd.rdoc_files.reject { |fn| fn =~ /\.rb$/ }.to_a
|
55
|
+
end
|
56
|
+
|
57
|
+
Rake::GemPackageTask.new(gem_spec) do |p|
|
58
|
+
p.gem_spec = gem_spec
|
59
|
+
p.need_tar = true
|
60
|
+
p.need_zip = true
|
61
|
+
end
|
62
|
+
|
63
|
+
desc "Publish RDOC to RubyForge"
|
64
|
+
task :rubyforge => [:rdoc, :gem] do
|
65
|
+
Rake::SshDirPublisher.new(ENV['RUBYFORGE_USER'], ENV['RUBYFORGE_PROJECT'], 'doc').upload
|
66
|
+
end
|
data/README
ADDED
File without changes
|
data/lib/pseudo_xml.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
module PseudoXml
|
2
|
+
|
3
|
+
class Collector
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
@parsed_elements = []
|
7
|
+
end
|
8
|
+
|
9
|
+
def start_element(element_text)
|
10
|
+
@parsed_elements.push Element.new(element_text[/\w+/])
|
11
|
+
end
|
12
|
+
|
13
|
+
def end_element
|
14
|
+
if @parsed_elements.size > 1
|
15
|
+
node = @parsed_elements.pop
|
16
|
+
@parsed_elements.last.elements << node
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def add_text(text)
|
21
|
+
if text.size > 0 && @parsed_elements.size > 0
|
22
|
+
@parsed_elements.last.text_nodes << text
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def nodes
|
27
|
+
@parsed_elements
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'pseudo_xml/xml_string'
|
2
|
+
require 'pseudo_xml/collector'
|
3
|
+
require 'pseudo_xml/element'
|
4
|
+
|
5
|
+
module PseudoXml
|
6
|
+
class Document
|
7
|
+
|
8
|
+
def initialize(xml)
|
9
|
+
parser = XmlString.new(xml)
|
10
|
+
collector = Collector.new
|
11
|
+
|
12
|
+
parser.parse(collector)
|
13
|
+
|
14
|
+
@root = collector.nodes.first
|
15
|
+
end
|
16
|
+
|
17
|
+
def root
|
18
|
+
@root
|
19
|
+
end
|
20
|
+
|
21
|
+
def count_elements
|
22
|
+
return 0 unless @root
|
23
|
+
@root.count
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module PseudoXml
|
2
|
+
|
3
|
+
class Element
|
4
|
+
|
5
|
+
attr_reader :name
|
6
|
+
|
7
|
+
def initialize(name)
|
8
|
+
@name = name.strip
|
9
|
+
end
|
10
|
+
|
11
|
+
def text_nodes
|
12
|
+
@text_nodes ||= []
|
13
|
+
end
|
14
|
+
|
15
|
+
def text
|
16
|
+
self.text_nodes.join($/)
|
17
|
+
end
|
18
|
+
|
19
|
+
def elements
|
20
|
+
@elements ||= []
|
21
|
+
end
|
22
|
+
|
23
|
+
include Enumerable
|
24
|
+
|
25
|
+
def each
|
26
|
+
self.elements.each { |node| yield node }
|
27
|
+
end
|
28
|
+
|
29
|
+
def to_s
|
30
|
+
@name
|
31
|
+
end
|
32
|
+
|
33
|
+
def count
|
34
|
+
self.elements.inject(1) { |sum, node| sum += node.count }
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'pseudo_xml/xml_string'
|
2
|
+
require 'pseudo_xml/collector'
|
3
|
+
|
4
|
+
module PseudoXml
|
5
|
+
class SimpleQuery < Collector
|
6
|
+
|
7
|
+
protected :initialize
|
8
|
+
|
9
|
+
def self.find(xml, element_name, &b)
|
10
|
+
parser = XmlString.new(xml)
|
11
|
+
collector = self.new(element_name, &b)
|
12
|
+
|
13
|
+
parser.parse(collector)
|
14
|
+
end
|
15
|
+
|
16
|
+
def initialize(element_name, &b)
|
17
|
+
@parsed_elements = []
|
18
|
+
@find_element_name = element_name
|
19
|
+
@block = b
|
20
|
+
end
|
21
|
+
|
22
|
+
def start_element(element_text)
|
23
|
+
element_name = element_text[/\w+/]
|
24
|
+
|
25
|
+
if @parsed_elements.size > 1 || element_name == @find_element_name
|
26
|
+
@parsed_elements.push Element.new(element_text[/\w+/])
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def end_element
|
31
|
+
if @parsed_elements.size > 1
|
32
|
+
node = @parsed_elements.pop
|
33
|
+
@parsed_elements.last.elements << node
|
34
|
+
elsif @parsed_elements.size == 1
|
35
|
+
@block.call(@parsed_elements.pop)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module PseudoXml
|
2
|
+
|
3
|
+
class XmlString
|
4
|
+
def initialize(xml)
|
5
|
+
@xml = xml
|
6
|
+
reset!
|
7
|
+
end
|
8
|
+
|
9
|
+
def reset!
|
10
|
+
@offset_start = @offset_end = skip_prolog
|
11
|
+
end
|
12
|
+
|
13
|
+
def skip_prolog
|
14
|
+
if @xml =~ /^(\<\?.*?\?\>(\r?\n)*)/
|
15
|
+
$1.size
|
16
|
+
else
|
17
|
+
0
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def parse(collector)
|
22
|
+
until(eof?) do
|
23
|
+
@offset_start = @xml.index('<', @offset_end)
|
24
|
+
|
25
|
+
return nil unless @offset_start
|
26
|
+
|
27
|
+
if @offset_start > @offset_end + 1
|
28
|
+
collector.add_text @xml[@offset_end + 1...@offset_start]
|
29
|
+
end
|
30
|
+
|
31
|
+
@offset_end = @xml.index('>', @offset_start)
|
32
|
+
|
33
|
+
return nil unless @offset_end
|
34
|
+
|
35
|
+
element_text = @xml[@offset_start..@offset_end]
|
36
|
+
|
37
|
+
if element_text[1,1] == '/'
|
38
|
+
collector.end_element
|
39
|
+
else
|
40
|
+
collector.start_element element_text
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def eof?
|
46
|
+
@offset_start.nil? || @offset_end.nil?
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'pseudo_xml/xml_string'
|
2
|
+
require 'pseudo_xml/collector'
|
3
|
+
require 'pseudo_xml/simple_query'
|
4
|
+
|
5
|
+
module PseudoXml
|
6
|
+
class XPathQuery < Collector
|
7
|
+
|
8
|
+
protected :initialize
|
9
|
+
|
10
|
+
def self.find(xml, xpath, &b)
|
11
|
+
parser = XmlString.new(xml)
|
12
|
+
collector = self.new(xpath, &b)
|
13
|
+
|
14
|
+
parser.parse(collector)
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize(xpath, &b)
|
18
|
+
@parsed_elements = []
|
19
|
+
@current_path = []
|
20
|
+
@xpath = xpath.split('/')
|
21
|
+
@xpath.shift if @xpath.first == ''
|
22
|
+
@block = b
|
23
|
+
end
|
24
|
+
|
25
|
+
def start_element(element_text)
|
26
|
+
element_name = element_text[/\w+/]
|
27
|
+
|
28
|
+
@current_path.push element_name
|
29
|
+
|
30
|
+
if @parsed_elements.size > 1 || xpath_match?
|
31
|
+
@parsed_elements.push Element.new(element_text[/\w+/])
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def xpath_match?
|
36
|
+
@xpath.size.times do |i|
|
37
|
+
return false unless @xpath[i] == '*' || @xpath[i] == @current_path[i]
|
38
|
+
end
|
39
|
+
|
40
|
+
return true
|
41
|
+
end
|
42
|
+
|
43
|
+
def end_element
|
44
|
+
|
45
|
+
@current_path.pop
|
46
|
+
|
47
|
+
if @parsed_elements.size > 1
|
48
|
+
node = @parsed_elements.pop
|
49
|
+
@parsed_elements.last.elements << node
|
50
|
+
elsif @parsed_elements.size == 1
|
51
|
+
@block.call(@parsed_elements.pop)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
metadata
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.8.10
|
3
|
+
specification_version: 1
|
4
|
+
name: pseudoxml
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 0.1.0
|
7
|
+
date: 2006-02-01
|
8
|
+
summary: "PseudoXml -- A not-quite Xml parser in pure-ruby"
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
email: ssmoot@gmail.com; bauer.mail@gmail.com
|
12
|
+
homepage: http://substantiality.net
|
13
|
+
rubyforge_project: pseudoxml
|
14
|
+
description: "Fast, low memory requirements for the streaming version, but no CDATA or
|
15
|
+
Attribute support"
|
16
|
+
autorequire: rexchange
|
17
|
+
default_executable:
|
18
|
+
bindir: bin
|
19
|
+
has_rdoc: true
|
20
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
21
|
+
requirements:
|
22
|
+
-
|
23
|
+
- ">"
|
24
|
+
- !ruby/object:Gem::Version
|
25
|
+
version: 0.0.0
|
26
|
+
version:
|
27
|
+
platform: ruby
|
28
|
+
authors:
|
29
|
+
- Sam Smoot
|
30
|
+
- Scott Bauer
|
31
|
+
files:
|
32
|
+
- README
|
33
|
+
- CHANGELOG
|
34
|
+
- RAKEFILE
|
35
|
+
- lib/pseudo_xml.rb
|
36
|
+
- lib/pseudo_xml/collector.rb
|
37
|
+
- lib/pseudo_xml/document.rb
|
38
|
+
- lib/pseudo_xml/element.rb
|
39
|
+
- lib/pseudo_xml/simple_query.rb
|
40
|
+
- lib/pseudo_xml/xml_string.rb
|
41
|
+
- lib/pseudo_xml/xpath_query.rb
|
42
|
+
test_files: []
|
43
|
+
rdoc_options:
|
44
|
+
- "--line-numbers"
|
45
|
+
- "--inline-source"
|
46
|
+
- "--main"
|
47
|
+
- README
|
48
|
+
extra_rdoc_files:
|
49
|
+
- README
|
50
|
+
- CHANGELOG
|
51
|
+
- RAKEFILE
|
52
|
+
executables: []
|
53
|
+
extensions: []
|
54
|
+
requirements:
|
55
|
+
- none
|
56
|
+
dependencies: []
|