scxml 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +0 -0
- data/lib/scxml.rb +86 -0
- data/lib/scxml/document.rb +165 -0
- data/lib/scxml/element.rb +219 -0
- data/lib/scxml/xpath.rb +142 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/predicate/predicate01.out +2 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/predicate/predicate02.out +2 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/predicate/predicate03.out +2 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/predicate/predicate04.out +2 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/predicate/predicate12.out +4 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/predicate/predicate13.out +2 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/predicate/predicate41.out +2 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/predicate/predicate42.out +2 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/predicate/predicate45.out +2 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/predicate/predicate51.out +2 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/predicate/predicate52.out +2 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/predicate/predicate56.out +2 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/predicate/predicate57.out +5 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/predicate/predicate58.out +11 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/select/select01.out +2 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/select/select02.out +4 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/select/select03.out +2 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/select/select04.out +14 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/select/select12.out +2 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/select/select13.out +13 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/select/select41.out +5 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/select/select42.out +5 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/select/select45.out +5 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/select/select51.out +5 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/select/select52.out +3 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/select/select56.out +7 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/select/select57.out +7 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/select/select58.out +5 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/select/select60.out +5 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/select/select62.out +2 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/select/select63.out +2 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/select/select76.out +2 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/select/select81.out +10 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/select/select82.out +2 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/select/select83.out +7 -0
- data/test/scxml/Xalan_Conformance_Tests/REF_OUT/select/select84.out +8 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate01.txt +8 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate01.xml +7 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate01.xsl +18 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate02.txt +8 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate02.xml +7 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate02.xsl +18 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate04.txt +8 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate04.xml +7 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate04.xsl +18 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate07.txt +9 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate07.xml +7 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate07.xsl +20 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate08.txt +9 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate08.xml +7 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate08.xsl +20 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate10.txt +8 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate10.xml +7 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate10.xsl +20 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate14.txt +9 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate14.xml +9 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate14.xsl +20 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate15.txt +9 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate15.xml +9 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate15.xsl +20 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate16.txt +9 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate16.xml +9 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate16.xsl +20 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate17.txt +9 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate17.xml +11 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate17.xsl +20 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate36.txt +8 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate36.xml +7 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate36.xsl +20 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate48.txt +8 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate48.xml +21 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate48.xsl +22 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate49.txt +8 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate49.xml +21 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate49.xsl +22 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate50.txt +8 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate50.xml +21 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate50.xsl +22 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate51.txt +8 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate51.xml +21 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate51.xsl +22 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate52.txt +8 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate52.xml +21 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate52.xsl +22 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate53.txt +8 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate53.xml +21 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate53.xsl +22 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate54.txt +7 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate54.xml +21 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate54.xsl +21 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate55.txt +8 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate55.xml +21 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate55.xsl +22 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate56.txt +8 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate56.xml +21 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/predicate56.xsl +22 -0
- data/test/scxml/Xalan_Conformance_Tests/predicate/prepare.rb +34 -0
- data/test/scxml/Xalan_Conformance_Tests/select/prepare.rb +34 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select01.txt +7 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select01.xml +9 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select01.xsl +19 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select02.txt +7 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select02.xml +4 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select02.xsl +17 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select03.txt +8 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select03.xml +13 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select03.xsl +19 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select04.txt +12 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select04.xml +12 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select04.xsl +35 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select12.txt +7 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select12.xml +9 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select12.xsl +19 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select13.txt +13 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select13.xml +18 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select13.xsl +33 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select41.txt +7 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select41.xml +9 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select41.xsl +23 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select42.txt +8 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select42.xml +9 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select42.xsl +26 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select45.txt +7 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select45.xml +24 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select45.xsl +25 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select51.txt +7 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select51.xml +24 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select51.xsl +25 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select52.txt +6 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select52.xml +6 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select52.xsl +23 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select56.txt +7 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select56.xml +29 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select56.xsl +25 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select57.txt +7 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select57.xml +29 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select57.xsl +25 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select58.txt +7 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select58.xml +17 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select58.xsl +25 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select60.txt +6 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select60.xml +9 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select60.xsl +25 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select62.txt +6 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select62.xml +5 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select62.xsl +22 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select63.txt +7 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select63.xml +10 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select63.xsl +19 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select76.txt +7 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select76.xml +6 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select76.xsl +17 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select81.txt +9 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select81.xml +6 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select81.xsl +24 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select82.txt +9 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select82.xml +5 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select82.xsl +28 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select83.txt +10 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select83.xml +10 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select83.xsl +32 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select84.txt +11 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select84.xml +10 -0
- data/test/scxml/Xalan_Conformance_Tests/select/select84.xsl +33 -0
- data/test/scxml/document_test.rb +231 -0
- data/test/scxml/element_test.rb +552 -0
- data/test/scxml_test_helper.rb +73 -0
- data/test/scxml_test_suite.rb +2 -0
- metadata +241 -0
data/README
ADDED
|
File without changes
|
data/lib/scxml.rb
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
require 'scxml/xpath'
|
|
2
|
+
require 'scxml/document'
|
|
3
|
+
require 'scxml/element'
|
|
4
|
+
require 'strscan'
|
|
5
|
+
|
|
6
|
+
# = SCXML
|
|
7
|
+
# Fast, lightweight access to simple xml data.
|
|
8
|
+
#
|
|
9
|
+
# == Introduction
|
|
10
|
+
# REXML is a standard, pure-Ruby XML processing library. It comes in the standard library and
|
|
11
|
+
# as far as I can tell is quite complete. It also is painfully slow when loading large XML documents
|
|
12
|
+
# and produces enormous memory footprints.
|
|
13
|
+
#
|
|
14
|
+
# SCanningXML parses XML using a scanning approach that works very quickly and with a small memory
|
|
15
|
+
# footprint. SCXML can run in a lightweight mode that minimizes memory at the expense of access
|
|
16
|
+
# time.
|
|
17
|
+
#
|
|
18
|
+
# Currently SCXML does not support insertion/removal of elements, although it may in the future.
|
|
19
|
+
# SCXML does allow elements to be re-written with new attributes and content. For making XML
|
|
20
|
+
# documents, I generally use the Builder[http://builder.rubyforge.com] gem.
|
|
21
|
+
#
|
|
22
|
+
# Elements can be accessed using XPath. Currently XPath support is incomplete, as well as the type
|
|
23
|
+
# of elements and content that can be parsed. See the limitations section below.
|
|
24
|
+
#
|
|
25
|
+
# Copyright (c) 2007 Simon Chiang
|
|
26
|
+
# Version: 0.1
|
|
27
|
+
# Licence: MIT-Style
|
|
28
|
+
#
|
|
29
|
+
# == Usage
|
|
30
|
+
#
|
|
31
|
+
# require 'scxml'
|
|
32
|
+
#
|
|
33
|
+
# xml_string = %Q{
|
|
34
|
+
# <?xml version="1.0" encoding="UTF-8"?>
|
|
35
|
+
# <root>
|
|
36
|
+
# <elements>
|
|
37
|
+
# <element id="0">a</element>
|
|
38
|
+
# <element id="1">b</element>
|
|
39
|
+
# <element id="2">c</element>
|
|
40
|
+
# </elements>
|
|
41
|
+
# </root> }
|
|
42
|
+
#
|
|
43
|
+
# # create a new xml document
|
|
44
|
+
# doc = SCXML::Document.new(xml_string)
|
|
45
|
+
#
|
|
46
|
+
# # select all elements
|
|
47
|
+
# elements = doc.select('/root/elements/element')
|
|
48
|
+
#
|
|
49
|
+
# # select element id = 1
|
|
50
|
+
# e = doc.select('/root/elements/element[@id='1']')
|
|
51
|
+
# e.name # -> 'element'
|
|
52
|
+
# e.attribute('key') # -> '1'
|
|
53
|
+
# e.atttributes # -> {'id' => '1'}
|
|
54
|
+
# e.content # -> 'a'
|
|
55
|
+
# e.rewrite(:attributes => {'id' => '10', 'new' => 'attr'}, :content => 'content') # -> <element id='10' new='attr'>content</element>
|
|
56
|
+
#
|
|
57
|
+
# # element creation sets the element to the first parsed node
|
|
58
|
+
# r = SCXML::Element.new(xml_string)
|
|
59
|
+
# r.name # -> 'root'
|
|
60
|
+
#
|
|
61
|
+
# # elements select relative to themselves
|
|
62
|
+
# e = r.select('elements')
|
|
63
|
+
# e.name # -> 'elements'
|
|
64
|
+
# elements = e.select('element')
|
|
65
|
+
# elements.length # -> 3
|
|
66
|
+
# elements.first.attribute('key') # -> '1'
|
|
67
|
+
#
|
|
68
|
+
# == Limitations
|
|
69
|
+
# SCXML does not support all types of xml nodes. At the moment only simple xml documents will be
|
|
70
|
+
# correctly parsed by SCXML. The 'Usage' example is about SCXML can handle: elements with children
|
|
71
|
+
# or content, as well as attributes.
|
|
72
|
+
#
|
|
73
|
+
# Supported XPath expressions are likewise limited to simple paths and predicates:
|
|
74
|
+
#
|
|
75
|
+
# doc.select('/') # -> selects the document
|
|
76
|
+
# doc.select('/root') # -> selects the root element
|
|
77
|
+
# e = doc.select('/root/elements') # -> selects the elements node
|
|
78
|
+
# e.select('*') # -> selects all children of the elements node
|
|
79
|
+
# e.select('element') # -> selects all children of the elements node named element
|
|
80
|
+
# doc.select('//element') # -> selects all element nodes wherever they occur
|
|
81
|
+
# doc.select('//element[@id='1']') # -> selects all element nodes with attribute id='1' wherever they occur
|
|
82
|
+
#
|
|
83
|
+
# See the tests for specific expression support.
|
|
84
|
+
#
|
|
85
|
+
module SCXML
|
|
86
|
+
end
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
|
|
2
|
+
module SCXML
|
|
3
|
+
|
|
4
|
+
# SCXML Documents keep one central instance of an XML string. All elements
|
|
5
|
+
# in a document are tracked using ranges. For example:
|
|
6
|
+
#
|
|
7
|
+
# <?xml version="1.0"?>
|
|
8
|
+
# <doc>
|
|
9
|
+
# <a>1</a>
|
|
10
|
+
# </doc>
|
|
11
|
+
#
|
|
12
|
+
# Is tracked as:
|
|
13
|
+
# doc.string_range = 0...45
|
|
14
|
+
# doc.string = "<?xml version="1.0"?>\n<doc>\n <a>1</a>\n</doc>"
|
|
15
|
+
# doc.content_range = 24...45
|
|
16
|
+
# doc.content = "<doc>\n <a>1</a>\n</doc>"
|
|
17
|
+
#
|
|
18
|
+
# a.string_range = 30...-7
|
|
19
|
+
# a.string = "<a>1</a>"
|
|
20
|
+
# a.content_range = 3...-4
|
|
21
|
+
# a.content = "1"
|
|
22
|
+
#
|
|
23
|
+
# In lightweight mode, the string and content of elements is not stored internally but rather
|
|
24
|
+
# computed from the ranges and extracted from document. Access is therefore slower, but
|
|
25
|
+
# the memory footprint significantly less, especially in large documents. Content for the document
|
|
26
|
+
# is always computed, and never stored internally.
|
|
27
|
+
#
|
|
28
|
+
# Attributes are parsed into a hash on their first access. In lightweight mode, the hash is not
|
|
29
|
+
# stored internally. These attribute hashes account for most of the access/footprint difference.
|
|
30
|
+
#
|
|
31
|
+
# Note that ranges are exclusive of the last indexed character (ie '...' is used in the range rather than '..'),
|
|
32
|
+
# and that range end will ONLY be positive if the range extends to the end of the document.
|
|
33
|
+
#
|
|
34
|
+
# String ranges encompass the entire tag and are relative to the full document. Content ranges are relative
|
|
35
|
+
# to the string range, and indicate all content within the tag. Ergo:
|
|
36
|
+
# doc.string == doc.string
|
|
37
|
+
# doc.content == doc.string[doc.content_range]
|
|
38
|
+
# a.string == doc.string[a.string_range]
|
|
39
|
+
# a.content == a.string[a.content_range]
|
|
40
|
+
class Document
|
|
41
|
+
include XPath
|
|
42
|
+
|
|
43
|
+
attr_reader :root, :scanner
|
|
44
|
+
|
|
45
|
+
# Creates a new Document from the input string. Options:
|
|
46
|
+
#
|
|
47
|
+
# +lightweight+:: In lightweight mode the string, content, and attributes of elements is recalculated on
|
|
48
|
+
# every access. This results in slower access, but a much smaller memory footprint. default => true
|
|
49
|
+
def initialize(string, options={})
|
|
50
|
+
@options = {:lightweight => true, :remove_whitespace => false}.merge(options)
|
|
51
|
+
|
|
52
|
+
@string = options[:remove_whitespace] ? string.gsub(/\s*\r?\n\s*/, '') : string
|
|
53
|
+
|
|
54
|
+
@scanner = StringScanner.new(@string)
|
|
55
|
+
s, range = scan_node(@scanner)
|
|
56
|
+
range = range.nil? ? 0...@string.length : range.begin...(range.end == 0 ? @string.length : range.end)
|
|
57
|
+
|
|
58
|
+
@content_range = range
|
|
59
|
+
@root = Element.new(self, range, s)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Returns true if the document is set to lightweight mode.
|
|
63
|
+
def lightweight?
|
|
64
|
+
@options[:lightweight]
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# The full range of the document (ie 0...length)
|
|
68
|
+
def string_range
|
|
69
|
+
0...string.length
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# The range from the beginning of the first element tag to the end of the corresponding end tag.
|
|
73
|
+
def content_range
|
|
74
|
+
@content_range #||= string_range
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Select elements using XPath statements. Not all statements are supported. See the introduction
|
|
78
|
+
# or tests for allowed statements.
|
|
79
|
+
def select(xpath)
|
|
80
|
+
return [] if xpath.nil?
|
|
81
|
+
return [self] if xpath == '/'
|
|
82
|
+
|
|
83
|
+
paths = xpath.scan(/\/*[^\/]+/)
|
|
84
|
+
select_by_paths(paths)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Returns an array of all node names present in the document.
|
|
88
|
+
def node_names
|
|
89
|
+
return @nodes if @nodes
|
|
90
|
+
|
|
91
|
+
nodes = string.scan(/<(\w+)/m).flatten.uniq
|
|
92
|
+
@nodes = nodes unless lightweight?
|
|
93
|
+
nodes
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Returns a table of element contents as configured. Options:
|
|
97
|
+
#
|
|
98
|
+
# +target+:: Specify the output target of the tableize operation. By default a string, but any object
|
|
99
|
+
# responding to '<<' can be provided. The target is returned by +tableize+
|
|
100
|
+
# +row+:: The xpath expression used to select rows of the table. default => '*'
|
|
101
|
+
# +col+:: The xpath expression used to select columns relative to the row elements. default => '*'
|
|
102
|
+
# +header_row+:: These should currently select the header row and cols, but should be replaced in favor
|
|
103
|
+
# of a more intutive interface
|
|
104
|
+
# +header_col+::
|
|
105
|
+
# +row_delimit+:: The row delimiter. default => '\n'
|
|
106
|
+
# +col_delimit+:: The column delimiter. default => '\t'
|
|
107
|
+
# +index+:: If true, the output rows will be prefixed by an index corresponding to the row.
|
|
108
|
+
# +col_width+:: Specifies the width of the columns. Content will be trimmed if it exceeds this width,
|
|
109
|
+
# and will be justified left if width > 0 and justified right if width < 0.
|
|
110
|
+
#
|
|
111
|
+
# Selected elements are passed to the block. The content for each table cell will be the return
|
|
112
|
+
# value of the block, or the element contents if no block is given.
|
|
113
|
+
def tableize(options={}, &block)
|
|
114
|
+
options = {
|
|
115
|
+
:target => "",
|
|
116
|
+
:row_delimit => "\n",
|
|
117
|
+
:col_delimit => "\t",
|
|
118
|
+
:row => "*",
|
|
119
|
+
:col => "*",
|
|
120
|
+
:header_row => nil,
|
|
121
|
+
:header_col => "*",
|
|
122
|
+
:index => false,
|
|
123
|
+
:col_width => nil
|
|
124
|
+
}.merge(options)
|
|
125
|
+
|
|
126
|
+
target = options[:target]
|
|
127
|
+
col_delimit = options[:col_delimit]
|
|
128
|
+
row_delimit = options[:row_delimit]
|
|
129
|
+
index = options[:index]
|
|
130
|
+
col_width = options[:col_width]
|
|
131
|
+
|
|
132
|
+
['header_', ''].each do |prefix|
|
|
133
|
+
row_xpath = options[ "#{prefix}row".to_sym ]
|
|
134
|
+
col_xpath = options[ "#{prefix}col".to_sym ]
|
|
135
|
+
|
|
136
|
+
rows = select(row_xpath)
|
|
137
|
+
rows.each_index do |i|
|
|
138
|
+
row = rows[i]
|
|
139
|
+
cols = row.select(col_xpath)
|
|
140
|
+
cols = block_given? ?
|
|
141
|
+
yield(row, cols) :
|
|
142
|
+
cols.collect {|col| col.content}
|
|
143
|
+
|
|
144
|
+
cols.unshift i if index
|
|
145
|
+
unless col_width.nil?
|
|
146
|
+
cols = cols.collect do |c|
|
|
147
|
+
col_width < 0 ? c.to_s.rjust(-col_width) : c.to_s.ljust(col_width)
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
target << cols.join(col_delimit)
|
|
152
|
+
target << row_delimit
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
target
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
protected
|
|
160
|
+
|
|
161
|
+
def children
|
|
162
|
+
[root]
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
end
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
|
|
2
|
+
module SCXML
|
|
3
|
+
class Element
|
|
4
|
+
class << self
|
|
5
|
+
def new(*args)
|
|
6
|
+
obj = super
|
|
7
|
+
(obj.string_range == obj.doc.content_range && !obj.doc.root.nil?) ? obj.doc.root : obj
|
|
8
|
+
end
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
include XPath
|
|
12
|
+
|
|
13
|
+
attr_reader :doc, :name, :parent
|
|
14
|
+
|
|
15
|
+
def initialize(arg, range=nil, string=nil)
|
|
16
|
+
case arg
|
|
17
|
+
when String
|
|
18
|
+
@doc = Document.new(arg)
|
|
19
|
+
@string_range = @doc.content_range
|
|
20
|
+
when Document
|
|
21
|
+
@doc = arg
|
|
22
|
+
@string_range = arg.content_range
|
|
23
|
+
when Element
|
|
24
|
+
@parent = arg
|
|
25
|
+
@doc = arg.doc
|
|
26
|
+
@string_range = join_range(arg.string_range, range)
|
|
27
|
+
else
|
|
28
|
+
raise ArgumentError, "Cannot initialize using: #{arg.class}"
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
@string = string unless doc.lightweight?
|
|
32
|
+
@name, @content_range = parse(self.string)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def string
|
|
36
|
+
if doc.lightweight?
|
|
37
|
+
doc.string[string_range]
|
|
38
|
+
else
|
|
39
|
+
@string ||= doc.string[string_range]
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def string_range
|
|
44
|
+
@string_range ||= doc.content_range
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def content(objectify=false)
|
|
48
|
+
content_str = doc.lightweight? ? super() : (@content_str ||= super())
|
|
49
|
+
return content_str unless objectify
|
|
50
|
+
|
|
51
|
+
stripped = content_str.strip
|
|
52
|
+
|
|
53
|
+
case stripped
|
|
54
|
+
when /^\d+$/ then stripped.to_i
|
|
55
|
+
when /^\d*\.?\d+$/ then stripped.to_f
|
|
56
|
+
when /^true$/i then true
|
|
57
|
+
when /^false$/i then false
|
|
58
|
+
else
|
|
59
|
+
content_str
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def attribute(key)
|
|
64
|
+
return @attributes[key] unless @attributes.nil?
|
|
65
|
+
|
|
66
|
+
attr_range_begin = string_range.begin + name.length + 1
|
|
67
|
+
attr_range_end = string_range.begin + content_range.begin - 1
|
|
68
|
+
|
|
69
|
+
scanner = doc.scanner
|
|
70
|
+
scanner.pos = attr_range_begin
|
|
71
|
+
scanner.skip_until(Regexp.new("#{key}="))
|
|
72
|
+
|
|
73
|
+
return nil if scanner.pos > attr_range_end
|
|
74
|
+
|
|
75
|
+
result = scanner.scan(/'[^']*'|\"[^\"]*\"/)
|
|
76
|
+
result ? result[1...-1] : nil
|
|
77
|
+
|
|
78
|
+
#attr_range = (string_range.begin + name.length + 1)...(string_range.begin + content_range.begin - 1)
|
|
79
|
+
#doc.string[attr_range] =~ Regexp.new("#{key}=('[^']*'|\"[^\"]*\")", Regexp::MULTILINE)
|
|
80
|
+
|
|
81
|
+
#$1 ? $1[1...-1] : nil
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def attributes
|
|
85
|
+
return @attributes unless @attributes.nil?
|
|
86
|
+
|
|
87
|
+
attrs = {}
|
|
88
|
+
|
|
89
|
+
# indexing and selection is vs the doc.string rather than element.string
|
|
90
|
+
# because in lightweight mode this will require an additional string selection
|
|
91
|
+
# to get element.string. Better to go straight to the source
|
|
92
|
+
attr_range = (string_range.begin + name.length + 1)...(string_range.begin + content_range.begin - 1)
|
|
93
|
+
doc.string[attr_range].scan(/(\w+)=('[^']*'|\"[^\"]*\")/m).each do |attr|
|
|
94
|
+
key, value = attr
|
|
95
|
+
attrs[key] = value[1...-1]
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
@attributes = attrs unless doc.lightweight?
|
|
99
|
+
attrs
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def children
|
|
103
|
+
unless @children
|
|
104
|
+
@children = []
|
|
105
|
+
|
|
106
|
+
scanner = StringScanner.new(content)
|
|
107
|
+
while node = content_node(scanner)
|
|
108
|
+
@children << node
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
@children
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def select(xpath)
|
|
116
|
+
return [] if xpath.nil?
|
|
117
|
+
|
|
118
|
+
if xpath =~ /^\//
|
|
119
|
+
doc.select(xpath)
|
|
120
|
+
else
|
|
121
|
+
paths = xpath.scan(/\/*[^\/]+/)
|
|
122
|
+
select_by_paths(paths)
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def hashify(xpath='*', &block)
|
|
127
|
+
hash = {}
|
|
128
|
+
select(xpath).each do |element|
|
|
129
|
+
key = block_given? ? yield(element) : element.name
|
|
130
|
+
hash[key] = element.content
|
|
131
|
+
end
|
|
132
|
+
hash
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def to_s
|
|
136
|
+
string
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def rewrite(options={})
|
|
140
|
+
n = options[:indent] || 0
|
|
141
|
+
|
|
142
|
+
attributes = options[:attributes] || self.attributes
|
|
143
|
+
attributes.merge!(options[:merge_attributes]) if options[:merge_attributes]
|
|
144
|
+
|
|
145
|
+
content = options[:content] || self.content
|
|
146
|
+
content += options[:add_content] if options[:add_content]
|
|
147
|
+
|
|
148
|
+
attr_string = attributes_to_s(attributes)
|
|
149
|
+
attr_string.insert(0, ' ') unless attr_string.strip.empty?
|
|
150
|
+
|
|
151
|
+
if content.empty?
|
|
152
|
+
closed = (string[-2..-1] == '/>')
|
|
153
|
+
closed ?
|
|
154
|
+
"#{indentation(n)}<#{name}#{attr_string}/>" :
|
|
155
|
+
"#{indentation(n)}<#{name}#{attr_string}></#{name}>"
|
|
156
|
+
else
|
|
157
|
+
content_str = reindent(content, n)
|
|
158
|
+
content_str = "#{content_str}#{indentation(n, true)}" unless content_str.strip.empty? || content_str == content
|
|
159
|
+
|
|
160
|
+
"#{indentation(n)}<#{name}#{attr_string}>#{content_str}</#{name}>"
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
protected
|
|
165
|
+
|
|
166
|
+
def attributes_to_s(attributes)
|
|
167
|
+
array = []
|
|
168
|
+
attributes.each_pair do |key, value|
|
|
169
|
+
array << "#{key}='#{value}'"
|
|
170
|
+
end
|
|
171
|
+
array.join(' ')
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
def indentation(n, newline=false)
|
|
175
|
+
%Q{#{newline ? "\n" : '' }#{' ' * n}}
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
def reindent(string, n)
|
|
179
|
+
scanner = StringScanner.new(string.strip)
|
|
180
|
+
output = []
|
|
181
|
+
while tag = scanner.scan_until(/>/)
|
|
182
|
+
closed_tag = (tag[-2..-1] == '/>')
|
|
183
|
+
end_tag = (closed_tag || tag.index('/'))
|
|
184
|
+
|
|
185
|
+
n += 2 if closed_tag || !end_tag
|
|
186
|
+
output << "#{indentation(n, true)}#{tag.strip}"
|
|
187
|
+
n -= 2 if closed_tag || end_tag
|
|
188
|
+
|
|
189
|
+
# checks to see if a tag end occurs before the next newline
|
|
190
|
+
# ie '<a>\n <b>some content</b>\n<a>
|
|
191
|
+
# and assures that this content is NOT reindented
|
|
192
|
+
if scanner.check(/[^\n<]*<\//)
|
|
193
|
+
output << scanner.scan_until(/>/)
|
|
194
|
+
n -= 2
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
# if no tags are present, simply return the input
|
|
199
|
+
output << string if output.empty?
|
|
200
|
+
|
|
201
|
+
output.join('')
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
def parse(str)
|
|
205
|
+
scanner = StringScanner.new(str)
|
|
206
|
+
|
|
207
|
+
scanner.scan_until(/<(\w+)[^>]*>/m)
|
|
208
|
+
start = scanner.pos
|
|
209
|
+
name = scanner[1]
|
|
210
|
+
|
|
211
|
+
end_tag = "</#{name}>"
|
|
212
|
+
scanner.skip_until(Regexp.new(end_tag, Regexp::MULTILINE))
|
|
213
|
+
finish = -scanner.restsize - end_tag.length
|
|
214
|
+
|
|
215
|
+
content_range = start...(finish)
|
|
216
|
+
[name, content_range]
|
|
217
|
+
end
|
|
218
|
+
end
|
|
219
|
+
end
|