polyrex-parser 0.2.8 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/polyrex-parser.rb +64 -13
- metadata +13 -11
data/lib/polyrex-parser.rb
CHANGED
@@ -13,48 +13,99 @@ class PolyrexParser
|
|
13
13
|
def to_a()
|
14
14
|
@a
|
15
15
|
end
|
16
|
+
|
17
|
+
def to_s()
|
18
|
+
name, value, attributes, *remaining = @a
|
19
|
+
[value.strip, scan_a(remaining)].flatten.join(' ')
|
20
|
+
end
|
16
21
|
|
17
22
|
private
|
18
23
|
|
19
|
-
def
|
24
|
+
def scan_a(a)
|
25
|
+
a.inject([]) do |r, x|
|
26
|
+
name, value, attributes, *remaining = x
|
27
|
+
text_remaining = scan_a remaining if remaining
|
20
28
|
|
29
|
+
value = '' if name == 'format_mask' or name == 'schema' or name == 'recordx_type'
|
30
|
+
r << value.strip << text_remaining if value
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def parse(s)
|
21
35
|
s.instance_eval{
|
22
36
|
def fetch_summary()
|
23
|
-
|
24
|
-
|
25
|
-
|
37
|
+
name = 'summary'
|
38
|
+
self.slice(((self =~ /<#{name}>/) + name.length + 2) .. \
|
39
|
+
(self =~ /<\/#{name}>/m) - 1) if self[/<#{name}>/]
|
26
40
|
end
|
27
41
|
def fetch_records()
|
28
|
-
|
29
|
-
|
30
|
-
|
42
|
+
name = 'records'
|
43
|
+
self.slice(((self =~ /<#{name}/) + name.length + 2) .. \
|
44
|
+
(self.rindex(/<\/#{name}>/m)) - 1) if self[/<\/#{name}/]
|
31
45
|
end
|
32
46
|
}
|
33
47
|
|
34
48
|
root_name, raw_attributes = s.match(/<(\w+)(\s[^\/>]+)?/).captures
|
35
49
|
attributes = get_attributes(raw_attributes) if raw_attributes
|
50
|
+
raw_summary = s.fetch_summary
|
36
51
|
|
37
|
-
summary = RexleParser.new("<summary>#{
|
52
|
+
summary = RexleParser.new("<summary>#{raw_summary}</summary>").to_a
|
38
53
|
|
39
54
|
raw_records = s.fetch_records
|
40
55
|
records = nil
|
41
56
|
|
57
|
+
|
42
58
|
if raw_records then
|
59
|
+
|
43
60
|
node_name = raw_records[/<(\w+)/,1]
|
44
|
-
|
45
|
-
|
61
|
+
|
62
|
+
#record_threads = raw_records.strip.split(/(?=<#{node_name}[^>]*>)/).map do |x|
|
63
|
+
|
64
|
+
a = []
|
65
|
+
i = 0
|
66
|
+
|
67
|
+
while i < raw_records.strip.length do
|
68
|
+
i = scan_s(raw_records, node_name, i) + 1
|
69
|
+
a << i
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
record_threads = ([0] + a).each_cons(2).map do |s1, s2|
|
74
|
+
raw_s = raw_records[s1...s2]
|
75
|
+
|
76
|
+
Thread.new{ Thread.current[:record] = parse(raw_s) }
|
46
77
|
end
|
47
78
|
records = record_threads.map{|x| x.join; x[:record]}
|
79
|
+
|
48
80
|
end
|
49
81
|
|
50
82
|
[root_name, "", attributes ||= {}, [*summary], ['records', "",{}, *records]]
|
83
|
+
|
51
84
|
end
|
52
85
|
|
53
|
-
|
86
|
+
|
87
|
+
def scan_s(s, node_name, instances=0, i=0)
|
88
|
+
|
89
|
+
r = s[i..-1] =~ /<\/?#{node_name}/
|
90
|
+
l = node_name.length + 1
|
91
|
+
return s.length if r.nil?
|
92
|
+
|
93
|
+
if s.slice(i + r,l) == "<#{node_name}" then
|
94
|
+
scan_s(s, node_name, instances+1, i + r + l)
|
95
|
+
else
|
96
|
+
if instances > 1 then
|
97
|
+
scan_s(s, node_name, instances - 1, i + r + node_name.length + 3)
|
98
|
+
else
|
99
|
+
return i + r + node_name.length + 2
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
end
|
104
|
+
|
105
|
+
def get_attributes(raw_attributes)
|
54
106
|
raw_attributes.scan(/(\w+\='[^']+')|(\w+\="[^"]+")/).map(&:compact).flatten.inject({}) do |r, x|
|
55
107
|
attr_name, val = x.split(/=/)
|
56
108
|
r.merge(attr_name.to_sym => val[1..-2])
|
57
109
|
end
|
58
|
-
end
|
59
|
-
|
110
|
+
end
|
60
111
|
end
|
metadata
CHANGED
@@ -1,27 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polyrex-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
prerelease:
|
5
|
+
version: 0.3.0
|
5
6
|
platform: ruby
|
6
|
-
authors:
|
7
|
-
|
7
|
+
authors:
|
8
|
+
- James Robertson
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
12
|
|
12
|
-
date: 2011-
|
13
|
+
date: 2011-11-20 00:00:00 +00:00
|
13
14
|
default_executable:
|
14
15
|
dependencies:
|
15
16
|
- !ruby/object:Gem::Dependency
|
16
17
|
name: recordx-parser
|
17
|
-
|
18
|
-
|
19
|
-
|
18
|
+
prerelease: false
|
19
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
20
|
+
none: false
|
20
21
|
requirements:
|
21
22
|
- - ">="
|
22
23
|
- !ruby/object:Gem::Version
|
23
24
|
version: "0"
|
24
|
-
|
25
|
+
type: :runtime
|
26
|
+
version_requirements: *id001
|
25
27
|
description:
|
26
28
|
email:
|
27
29
|
executables: []
|
@@ -42,21 +44,21 @@ rdoc_options: []
|
|
42
44
|
require_paths:
|
43
45
|
- lib
|
44
46
|
required_ruby_version: !ruby/object:Gem::Requirement
|
47
|
+
none: false
|
45
48
|
requirements:
|
46
49
|
- - ">="
|
47
50
|
- !ruby/object:Gem::Version
|
48
51
|
version: "0"
|
49
|
-
version:
|
50
52
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
51
54
|
requirements:
|
52
55
|
- - ">="
|
53
56
|
- !ruby/object:Gem::Version
|
54
57
|
version: "0"
|
55
|
-
version:
|
56
58
|
requirements: []
|
57
59
|
|
58
60
|
rubyforge_project:
|
59
|
-
rubygems_version: 1.
|
61
|
+
rubygems_version: 1.5.2
|
60
62
|
signing_key:
|
61
63
|
specification_version: 3
|
62
64
|
summary: polyrex-parser
|