pseudoxml 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +0 -0
- data/RAKEFILE +66 -0
- data/README +0 -0
- data/lib/pseudo_xml.rb +5 -0
- data/lib/pseudo_xml/collector.rb +30 -0
- data/lib/pseudo_xml/document.rb +26 -0
- data/lib/pseudo_xml/element.rb +37 -0
- data/lib/pseudo_xml/simple_query.rb +39 -0
- data/lib/pseudo_xml/xml_string.rb +51 -0
- data/lib/pseudo_xml/xpath_query.rb +55 -0
- metadata +56 -0
data/CHANGELOG
ADDED
File without changes
|
data/RAKEFILE
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'rake'
|
5
|
+
require 'rake/rdoctask'
|
6
|
+
require 'rake/gempackagetask'
|
7
|
+
require 'rake/contrib/rubyforgepublisher'
|
8
|
+
require 'pscp'
|
9
|
+
|
10
|
+
PACKAGE_VERSION = '0.1.0'
|
11
|
+
|
12
|
+
PACKAGE_FILES = FileList[
|
13
|
+
'README',
|
14
|
+
'CHANGELOG',
|
15
|
+
'RAKEFILE',
|
16
|
+
'lib/**/*.rb'
|
17
|
+
].to_a
|
18
|
+
|
19
|
+
PROJECT = 'pseudoxml'
|
20
|
+
|
21
|
+
ENV['RUBYFORGE_USER'] = "ssmoot@rubyforge.org"
|
22
|
+
ENV['RUBYFORGE_PROJECT'] = "/var/www/gforge-projects/#{PROJECT}"
|
23
|
+
|
24
|
+
task :default => [:rdoc]
|
25
|
+
|
26
|
+
desc 'Generate Documentation'
|
27
|
+
rd = Rake::RDocTask.new do |rdoc|
|
28
|
+
rdoc.rdoc_dir = 'doc'
|
29
|
+
rdoc.title = "PseudoXml -- A not-quite Xml parser in pure-ruby"
|
30
|
+
rdoc.options << '--line-numbers' << '--inline-source' << '--main' << 'README'
|
31
|
+
rdoc.rdoc_files.include(PACKAGE_FILES)
|
32
|
+
end
|
33
|
+
|
34
|
+
gem_spec = Gem::Specification.new do |s|
|
35
|
+
s.platform = Gem::Platform::RUBY
|
36
|
+
s.name = PROJECT
|
37
|
+
s.summary = "PseudoXml -- A not-quite Xml parser in pure-ruby"
|
38
|
+
s.description = "Fast, low memory requirements for the streaming version, but no CDATA or Attribute support"
|
39
|
+
s.version = PACKAGE_VERSION
|
40
|
+
|
41
|
+
s.authors = 'Sam Smoot', 'Scott Bauer'
|
42
|
+
s.email = 'ssmoot@gmail.com; bauer.mail@gmail.com'
|
43
|
+
s.rubyforge_project = PROJECT
|
44
|
+
s.homepage = 'http://substantiality.net'
|
45
|
+
|
46
|
+
s.files = PACKAGE_FILES
|
47
|
+
|
48
|
+
s.require_path = 'lib'
|
49
|
+
s.requirements << 'none'
|
50
|
+
s.autorequire = 'rexchange'
|
51
|
+
|
52
|
+
s.has_rdoc = true
|
53
|
+
s.rdoc_options << '--line-numbers' << '--inline-source' << '--main' << 'README'
|
54
|
+
s.extra_rdoc_files = rd.rdoc_files.reject { |fn| fn =~ /\.rb$/ }.to_a
|
55
|
+
end
|
56
|
+
|
57
|
+
Rake::GemPackageTask.new(gem_spec) do |p|
|
58
|
+
p.gem_spec = gem_spec
|
59
|
+
p.need_tar = true
|
60
|
+
p.need_zip = true
|
61
|
+
end
|
62
|
+
|
63
|
+
desc "Publish RDOC to RubyForge"
|
64
|
+
task :rubyforge => [:rdoc, :gem] do
|
65
|
+
Rake::SshDirPublisher.new(ENV['RUBYFORGE_USER'], ENV['RUBYFORGE_PROJECT'], 'doc').upload
|
66
|
+
end
|
data/README
ADDED
File without changes
|
data/lib/pseudo_xml.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
module PseudoXml
|
2
|
+
|
3
|
+
class Collector
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
@parsed_elements = []
|
7
|
+
end
|
8
|
+
|
9
|
+
def start_element(element_text)
|
10
|
+
@parsed_elements.push Element.new(element_text[/\w+/])
|
11
|
+
end
|
12
|
+
|
13
|
+
def end_element
|
14
|
+
if @parsed_elements.size > 1
|
15
|
+
node = @parsed_elements.pop
|
16
|
+
@parsed_elements.last.elements << node
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def add_text(text)
|
21
|
+
if text.size > 0 && @parsed_elements.size > 0
|
22
|
+
@parsed_elements.last.text_nodes << text
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def nodes
|
27
|
+
@parsed_elements
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'pseudo_xml/xml_string'
|
2
|
+
require 'pseudo_xml/collector'
|
3
|
+
require 'pseudo_xml/element'
|
4
|
+
|
5
|
+
module PseudoXml
|
6
|
+
class Document
|
7
|
+
|
8
|
+
def initialize(xml)
|
9
|
+
parser = XmlString.new(xml)
|
10
|
+
collector = Collector.new
|
11
|
+
|
12
|
+
parser.parse(collector)
|
13
|
+
|
14
|
+
@root = collector.nodes.first
|
15
|
+
end
|
16
|
+
|
17
|
+
def root
|
18
|
+
@root
|
19
|
+
end
|
20
|
+
|
21
|
+
def count_elements
|
22
|
+
return 0 unless @root
|
23
|
+
@root.count
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module PseudoXml
|
2
|
+
|
3
|
+
class Element
|
4
|
+
|
5
|
+
attr_reader :name
|
6
|
+
|
7
|
+
def initialize(name)
|
8
|
+
@name = name.strip
|
9
|
+
end
|
10
|
+
|
11
|
+
def text_nodes
|
12
|
+
@text_nodes ||= []
|
13
|
+
end
|
14
|
+
|
15
|
+
def text
|
16
|
+
self.text_nodes.join($/)
|
17
|
+
end
|
18
|
+
|
19
|
+
def elements
|
20
|
+
@elements ||= []
|
21
|
+
end
|
22
|
+
|
23
|
+
include Enumerable
|
24
|
+
|
25
|
+
def each
|
26
|
+
self.elements.each { |node| yield node }
|
27
|
+
end
|
28
|
+
|
29
|
+
def to_s
|
30
|
+
@name
|
31
|
+
end
|
32
|
+
|
33
|
+
def count
|
34
|
+
self.elements.inject(1) { |sum, node| sum += node.count }
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'pseudo_xml/xml_string'
|
2
|
+
require 'pseudo_xml/collector'
|
3
|
+
|
4
|
+
module PseudoXml
|
5
|
+
class SimpleQuery < Collector
|
6
|
+
|
7
|
+
protected :initialize
|
8
|
+
|
9
|
+
def self.find(xml, element_name, &b)
|
10
|
+
parser = XmlString.new(xml)
|
11
|
+
collector = self.new(element_name, &b)
|
12
|
+
|
13
|
+
parser.parse(collector)
|
14
|
+
end
|
15
|
+
|
16
|
+
def initialize(element_name, &b)
|
17
|
+
@parsed_elements = []
|
18
|
+
@find_element_name = element_name
|
19
|
+
@block = b
|
20
|
+
end
|
21
|
+
|
22
|
+
def start_element(element_text)
|
23
|
+
element_name = element_text[/\w+/]
|
24
|
+
|
25
|
+
if @parsed_elements.size > 1 || element_name == @find_element_name
|
26
|
+
@parsed_elements.push Element.new(element_text[/\w+/])
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def end_element
|
31
|
+
if @parsed_elements.size > 1
|
32
|
+
node = @parsed_elements.pop
|
33
|
+
@parsed_elements.last.elements << node
|
34
|
+
elsif @parsed_elements.size == 1
|
35
|
+
@block.call(@parsed_elements.pop)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module PseudoXml
|
2
|
+
|
3
|
+
class XmlString
|
4
|
+
def initialize(xml)
|
5
|
+
@xml = xml
|
6
|
+
reset!
|
7
|
+
end
|
8
|
+
|
9
|
+
def reset!
|
10
|
+
@offset_start = @offset_end = skip_prolog
|
11
|
+
end
|
12
|
+
|
13
|
+
def skip_prolog
|
14
|
+
if @xml =~ /^(\<\?.*?\?\>(\r?\n)*)/
|
15
|
+
$1.size
|
16
|
+
else
|
17
|
+
0
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def parse(collector)
|
22
|
+
until(eof?) do
|
23
|
+
@offset_start = @xml.index('<', @offset_end)
|
24
|
+
|
25
|
+
return nil unless @offset_start
|
26
|
+
|
27
|
+
if @offset_start > @offset_end + 1
|
28
|
+
collector.add_text @xml[@offset_end + 1...@offset_start]
|
29
|
+
end
|
30
|
+
|
31
|
+
@offset_end = @xml.index('>', @offset_start)
|
32
|
+
|
33
|
+
return nil unless @offset_end
|
34
|
+
|
35
|
+
element_text = @xml[@offset_start..@offset_end]
|
36
|
+
|
37
|
+
if element_text[1,1] == '/'
|
38
|
+
collector.end_element
|
39
|
+
else
|
40
|
+
collector.start_element element_text
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def eof?
|
46
|
+
@offset_start.nil? || @offset_end.nil?
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'pseudo_xml/xml_string'
|
2
|
+
require 'pseudo_xml/collector'
|
3
|
+
require 'pseudo_xml/simple_query'
|
4
|
+
|
5
|
+
module PseudoXml
|
6
|
+
class XPathQuery < Collector
|
7
|
+
|
8
|
+
protected :initialize
|
9
|
+
|
10
|
+
def self.find(xml, xpath, &b)
|
11
|
+
parser = XmlString.new(xml)
|
12
|
+
collector = self.new(xpath, &b)
|
13
|
+
|
14
|
+
parser.parse(collector)
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize(xpath, &b)
|
18
|
+
@parsed_elements = []
|
19
|
+
@current_path = []
|
20
|
+
@xpath = xpath.split('/')
|
21
|
+
@xpath.shift if @xpath.first == ''
|
22
|
+
@block = b
|
23
|
+
end
|
24
|
+
|
25
|
+
def start_element(element_text)
|
26
|
+
element_name = element_text[/\w+/]
|
27
|
+
|
28
|
+
@current_path.push element_name
|
29
|
+
|
30
|
+
if @parsed_elements.size > 1 || xpath_match?
|
31
|
+
@parsed_elements.push Element.new(element_text[/\w+/])
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def xpath_match?
|
36
|
+
@xpath.size.times do |i|
|
37
|
+
return false unless @xpath[i] == '*' || @xpath[i] == @current_path[i]
|
38
|
+
end
|
39
|
+
|
40
|
+
return true
|
41
|
+
end
|
42
|
+
|
43
|
+
def end_element
|
44
|
+
|
45
|
+
@current_path.pop
|
46
|
+
|
47
|
+
if @parsed_elements.size > 1
|
48
|
+
node = @parsed_elements.pop
|
49
|
+
@parsed_elements.last.elements << node
|
50
|
+
elsif @parsed_elements.size == 1
|
51
|
+
@block.call(@parsed_elements.pop)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
metadata
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.8.10
|
3
|
+
specification_version: 1
|
4
|
+
name: pseudoxml
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 0.1.0
|
7
|
+
date: 2006-02-01
|
8
|
+
summary: "PseudoXml -- A not-quite Xml parser in pure-ruby"
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
email: ssmoot@gmail.com; bauer.mail@gmail.com
|
12
|
+
homepage: http://substantiality.net
|
13
|
+
rubyforge_project: pseudoxml
|
14
|
+
description: "Fast, low memory requirements for the streaming version, but no CDATA or
|
15
|
+
Attribute support"
|
16
|
+
autorequire: rexchange
|
17
|
+
default_executable:
|
18
|
+
bindir: bin
|
19
|
+
has_rdoc: true
|
20
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
21
|
+
requirements:
|
22
|
+
-
|
23
|
+
- ">"
|
24
|
+
- !ruby/object:Gem::Version
|
25
|
+
version: 0.0.0
|
26
|
+
version:
|
27
|
+
platform: ruby
|
28
|
+
authors:
|
29
|
+
- Sam Smoot
|
30
|
+
- Scott Bauer
|
31
|
+
files:
|
32
|
+
- README
|
33
|
+
- CHANGELOG
|
34
|
+
- RAKEFILE
|
35
|
+
- lib/pseudo_xml.rb
|
36
|
+
- lib/pseudo_xml/collector.rb
|
37
|
+
- lib/pseudo_xml/document.rb
|
38
|
+
- lib/pseudo_xml/element.rb
|
39
|
+
- lib/pseudo_xml/simple_query.rb
|
40
|
+
- lib/pseudo_xml/xml_string.rb
|
41
|
+
- lib/pseudo_xml/xpath_query.rb
|
42
|
+
test_files: []
|
43
|
+
rdoc_options:
|
44
|
+
- "--line-numbers"
|
45
|
+
- "--inline-source"
|
46
|
+
- "--main"
|
47
|
+
- README
|
48
|
+
extra_rdoc_files:
|
49
|
+
- README
|
50
|
+
- CHANGELOG
|
51
|
+
- RAKEFILE
|
52
|
+
executables: []
|
53
|
+
extensions: []
|
54
|
+
requirements:
|
55
|
+
- none
|
56
|
+
dependencies: []
|