chupa-text 1.1.3 → 1.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/doc/text/news.md +12 -0
- data/lib/chupa-text/command/chupa-text.rb +7 -1
- data/lib/chupa-text/decomposer.rb +8 -0
- data/lib/chupa-text/decomposers/office-open-xml-document.rb +51 -0
- data/lib/chupa-text/decomposers/office-open-xml-presentation.rb +67 -0
- data/lib/chupa-text/decomposers/office-open-xml-workbook.rb +114 -0
- data/lib/chupa-text/decomposers/office-open-xml.rb +196 -0
- data/lib/chupa-text/decomposers/opendocument-presentation.rb +105 -0
- data/lib/chupa-text/decomposers/opendocument-spreadsheet.rb +134 -0
- data/lib/chupa-text/decomposers/opendocument-text.rb +89 -0
- data/lib/chupa-text/decomposers/opendocument.rb +139 -0
- data/lib/chupa-text/extractor.rb +8 -2
- data/lib/chupa-text/formatters/mime.rb +3 -2
- data/lib/chupa-text/version.rb +1 -1
- data/test/decomposers/test-office-open-xml-document.rb +144 -0
- data/test/decomposers/test-office-open-xml-presentation.rb +133 -0
- data/test/decomposers/test-office-open-xml-workbook.rb +138 -0
- data/test/decomposers/test-open-document-presentation.rb +136 -0
- data/test/decomposers/test-open-document-spreadsheet.rb +152 -0
- data/test/decomposers/test-open-document-text.rb +144 -0
- data/test/fixture/docx/attributes.docx +0 -0
- data/test/fixture/docx/multi-pages.docx +0 -0
- data/test/fixture/docx/one-page.docx +0 -0
- data/test/fixture/docx/special-characters.docx +0 -0
- data/test/fixture/odp/attributes.odp +0 -0
- data/test/fixture/odp/multi-slides.odp +0 -0
- data/test/fixture/odp/one-slide.odp +0 -0
- data/test/fixture/ods/attributes.ods +0 -0
- data/test/fixture/ods/multi-sheets.ods +0 -0
- data/test/fixture/ods/one-sheet.ods +0 -0
- data/test/fixture/odt/attributes.odt +0 -0
- data/test/fixture/odt/multi-pages.odt +0 -0
- data/test/fixture/odt/one-page.odt +0 -0
- data/test/fixture/odt/special-characters.odt +0 -0
- data/test/fixture/pptx/attributes.pptx +0 -0
- data/test/fixture/pptx/multi-slides.pptx +0 -0
- data/test/fixture/pptx/one-slide.pptx +0 -0
- data/test/fixture/xlsx/attributes.xlsx +0 -0
- data/test/fixture/xlsx/multi-sheets.xlsx +0 -0
- data/test/fixture/xlsx/one-sheet.xlsx +0 -0
- metadata +36 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ce1def40525d7278aa45cdbe3af69cd95656fa58d1d02e6ddb3e2677940ed7d6
|
4
|
+
data.tar.gz: 4ec184c4bd0f61508d4b1908e7c89abd8aeb01026c8b3590b404ca672887c6a1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ae43e4354761a953f61cda5348524f44346996133e4f77a310bfbfd07295b4548d41847f8132387ec9ade30c44a9c2bdd7936c7633085534ce03bb5db6f9061f
|
7
|
+
data.tar.gz: f9174e01e21a2dbbc1647d191084969e9fd0cf4dc6f24a469ac7c6c4f2378d7a11946e6b2350e8df86ad5d007c6f048c804d4c1c2b79e95628511383c76c6061
|
data/doc/text/news.md
CHANGED
@@ -1,5 +1,17 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 1.1.4: 2019-02-26
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* Added support for decomposer selection by score.
|
8
|
+
|
9
|
+
* Added support for Office Open XML.
|
10
|
+
|
11
|
+
* Added support for OpenDocument.
|
12
|
+
|
13
|
+
* `chupa-text`: Added `--mime-boundary` option.
|
14
|
+
|
3
15
|
## 1.1.3: 2018-07-18
|
4
16
|
|
5
17
|
### Improvements
|
@@ -46,6 +46,7 @@ module ChupaText
|
|
46
46
|
@uri = nil
|
47
47
|
@mime_type = nil
|
48
48
|
@format = :json
|
49
|
+
@mime_formatter_options = {}
|
49
50
|
@need_screenshot = true
|
50
51
|
@expected_screenshot_size = [200, 200]
|
51
52
|
end
|
@@ -127,6 +128,11 @@ module ChupaText
|
|
127
128
|
"(default: #{@format})") do |format|
|
128
129
|
@format = format
|
129
130
|
end
|
131
|
+
parser.on("--mime-boundary=BOUNDARY",
|
132
|
+
"Use BOUNDARY for MIME boundary.",
|
133
|
+
"(default: Use SHA1 digest of URI)") do |boundary|
|
134
|
+
@mime_formatter_options[:boundary] = boundary
|
135
|
+
end
|
130
136
|
parser.on("--[no-]need-screenshot",
|
131
137
|
"Generate screenshot if available.",
|
132
138
|
"(default: #{@need_screenshot})") do |boolean|
|
@@ -220,7 +226,7 @@ module ChupaText
|
|
220
226
|
when :text
|
221
227
|
Formatters::Text.new($stdout)
|
222
228
|
when :mime
|
223
|
-
Formatters::MIME.new($stdout)
|
229
|
+
Formatters::MIME.new($stdout, @mime_formatter_options)
|
224
230
|
end
|
225
231
|
end
|
226
232
|
end
|
@@ -30,6 +30,14 @@ module ChupaText
|
|
30
30
|
raise NotImplementedError, "must implement #{self.class}\##{__method__}"
|
31
31
|
end
|
32
32
|
|
33
|
+
def target_score(data)
|
34
|
+
if target?(data)
|
35
|
+
0
|
36
|
+
else
|
37
|
+
nil
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
33
41
|
def decompose(data)
|
34
42
|
raise NotImplementedError, "must implement #{self.class}\##{__method__}"
|
35
43
|
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# Copyright (C) 2019 Kouhei Sutou <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# This library is free software; you can redistribute it and/or
|
4
|
+
# modify it under the terms of the GNU Lesser General Public
|
5
|
+
# License as published by the Free Software Foundation; either
|
6
|
+
# version 2.1 of the License, or (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This library is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
11
|
+
# Lesser General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU Lesser General Public
|
14
|
+
# License along with this library; if not, write to the Free Software
|
15
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
16
|
+
|
17
|
+
require "chupa-text/decomposers/office-open-xml"
|
18
|
+
|
19
|
+
module ChupaText
|
20
|
+
module Decomposers
|
21
|
+
class OfficeOpenXMLDocument < OfficeOpenXML
|
22
|
+
registry.register("office-open-xml-document", self)
|
23
|
+
|
24
|
+
def initialize(options={})
|
25
|
+
super
|
26
|
+
@extensions = [
|
27
|
+
"docx",
|
28
|
+
"docm",
|
29
|
+
"dotx",
|
30
|
+
"dotm",
|
31
|
+
]
|
32
|
+
@mime_types = [
|
33
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
34
|
+
"application/vnd.ms-word.document.macroEnabled.12",
|
35
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.template",
|
36
|
+
"application/vnd.ms-word.template.macroEnabled.12",
|
37
|
+
]
|
38
|
+
@namespace_uri =
|
39
|
+
"http://schemas.openxmlformats.org/wordprocessingml/2006/main"
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
def process_entry(entry, context)
|
44
|
+
case entry.zip_path
|
45
|
+
when "word/document.xml"
|
46
|
+
extract_text(entry, context[:text])
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# Copyright (C) 2019 Kouhei Sutou <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# This library is free software; you can redistribute it and/or
|
4
|
+
# modify it under the terms of the GNU Lesser General Public
|
5
|
+
# License as published by the Free Software Foundation; either
|
6
|
+
# version 2.1 of the License, or (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This library is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
11
|
+
# Lesser General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU Lesser General Public
|
14
|
+
# License along with this library; if not, write to the Free Software
|
15
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
16
|
+
|
17
|
+
require "chupa-text/decomposers/office-open-xml"
|
18
|
+
|
19
|
+
module ChupaText
|
20
|
+
module Decomposers
|
21
|
+
class OfficeOpenXMLPresentation < OfficeOpenXML
|
22
|
+
registry.register("office-open-xml-presentation", self)
|
23
|
+
|
24
|
+
def initialize(options={})
|
25
|
+
super
|
26
|
+
@extensions = [
|
27
|
+
"pptx",
|
28
|
+
"pptm",
|
29
|
+
"ppsx",
|
30
|
+
"ppsm",
|
31
|
+
"potx",
|
32
|
+
"potm",
|
33
|
+
"sldx",
|
34
|
+
"sldm",
|
35
|
+
]
|
36
|
+
@mime_types = [
|
37
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
38
|
+
"application/vnd.ms-powerpoint.presentation.macroEnabled.12",
|
39
|
+
"application/vnd.openxmlformats-officedocument.presentationml.slideshow",
|
40
|
+
"application/vnd.ms-powerpoint.slideshow.macroEnabled.12",
|
41
|
+
"application/vnd.openxmlformats-officedocument.presentationml.template",
|
42
|
+
"application/vnd.ms-powerpoint.template.macroEnabled.12",
|
43
|
+
"application/vnd.openxmlformats-officedocument.presentationml.slide",
|
44
|
+
"application/vnd.ms-powerpoint.slide.macroEnabled.12",
|
45
|
+
]
|
46
|
+
@namespace_uri =
|
47
|
+
"http://schemas.openxmlformats.org/drawingml/2006/main"
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
def process_entry(entry, context)
|
52
|
+
case entry.zip_path
|
53
|
+
when /\Appt\/slides\/slide(\d+)\.xml/
|
54
|
+
nth_slide = Integer($1, 10)
|
55
|
+
slide_text = ""
|
56
|
+
extract_text(entry, slide_text)
|
57
|
+
context[:slides] ||= []
|
58
|
+
context[:slides] << [nth_slide, slide_text]
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def accumulate_text(context)
|
63
|
+
context[:slides].sort_by(&:first).collect(&:last).join("\n")
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,114 @@
|
|
1
|
+
# Copyright (C) 2019 Kouhei Sutou <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# This library is free software; you can redistribute it and/or
|
4
|
+
# modify it under the terms of the GNU Lesser General Public
|
5
|
+
# License as published by the Free Software Foundation; either
|
6
|
+
# version 2.1 of the License, or (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This library is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
11
|
+
# Lesser General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU Lesser General Public
|
14
|
+
# License along with this library; if not, write to the Free Software
|
15
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
16
|
+
|
17
|
+
require "chupa-text/decomposers/office-open-xml"
|
18
|
+
|
19
|
+
module ChupaText
|
20
|
+
module Decomposers
|
21
|
+
class OfficeOpenXMLWorkbook < OfficeOpenXML
|
22
|
+
registry.register("office-open-xml-workbook", self)
|
23
|
+
|
24
|
+
def initialize(options={})
|
25
|
+
super
|
26
|
+
@extensions = [
|
27
|
+
"xlsx",
|
28
|
+
"xlsm",
|
29
|
+
"xltx",
|
30
|
+
"xltm",
|
31
|
+
]
|
32
|
+
@mime_types = [
|
33
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
34
|
+
"application/vnd.ms-excel.sheet.macroEnabled.12",
|
35
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.template",
|
36
|
+
"application/vnd.ms-excel.template.macroEnabled.12",
|
37
|
+
]
|
38
|
+
@namespace_uri =
|
39
|
+
"http://schemas.openxmlformats.org/spreadsheetml/2006/main"
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
def process_entry(entry, context)
|
44
|
+
case entry.zip_path
|
45
|
+
when "xl/sharedStrings.xml"
|
46
|
+
context[:shared_strings] = []
|
47
|
+
extract_text(entry, context[:shared_strings])
|
48
|
+
when /\Axl\/worksheets\/sheet(\d+)\.xml\z/
|
49
|
+
nth_sheet = Integer($1, 10)
|
50
|
+
sheet = []
|
51
|
+
listener = SheetListener.new(sheet)
|
52
|
+
parse(entry.file_data, listener)
|
53
|
+
context[:sheets] ||= []
|
54
|
+
context[:sheets] << [nth_sheet, sheet]
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def accumulate_text(context)
|
59
|
+
shared_strings = context[:shared_strings]
|
60
|
+
sheets = context[:sheets].sort_by(&:first).collect(&:last)
|
61
|
+
sheet_texts = sheets.collect do |sheet|
|
62
|
+
sheet_text = ""
|
63
|
+
sheet.each do |row|
|
64
|
+
row_texts = row.collect do |index|
|
65
|
+
shared_strings[index]
|
66
|
+
end
|
67
|
+
sheet_text << row_texts.join("\t") << "\n"
|
68
|
+
end
|
69
|
+
sheet_text
|
70
|
+
end
|
71
|
+
sheet_texts.join("\n")
|
72
|
+
end
|
73
|
+
|
74
|
+
class SheetListener
|
75
|
+
include REXML::SAX2Listener
|
76
|
+
|
77
|
+
URI = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"
|
78
|
+
|
79
|
+
def initialize(sheet)
|
80
|
+
@sheet = sheet
|
81
|
+
@in_v = false
|
82
|
+
end
|
83
|
+
|
84
|
+
def start_element(uri, local_name, qname, attributes)
|
85
|
+
return unless uri == URI
|
86
|
+
case local_name
|
87
|
+
when "row"
|
88
|
+
@sheet << []
|
89
|
+
when "v"
|
90
|
+
@in_v = true
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def end_element(uri, local_name, qname)
|
95
|
+
@in_v = false
|
96
|
+
end
|
97
|
+
|
98
|
+
def characters(text)
|
99
|
+
add_column(text)
|
100
|
+
end
|
101
|
+
|
102
|
+
def cdata(content)
|
103
|
+
add_column(content)
|
104
|
+
end
|
105
|
+
|
106
|
+
private
|
107
|
+
def add_column(text)
|
108
|
+
return unless @in_v
|
109
|
+
@sheet.last << Integer(text, 10)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
@@ -0,0 +1,196 @@
|
|
1
|
+
# Copyright (C) 2019 Kouhei Sutou <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# This library is free software; you can redistribute it and/or
|
4
|
+
# modify it under the terms of the GNU Lesser General Public
|
5
|
+
# License as published by the Free Software Foundation; either
|
6
|
+
# version 2.1 of the License, or (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This library is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
11
|
+
# Lesser General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU Lesser General Public
|
14
|
+
# License along with this library; if not, write to the Free Software
|
15
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
16
|
+
|
17
|
+
require "cgi/util"
|
18
|
+
require "rexml/parsers/sax2parser"
|
19
|
+
require "rexml/sax2listener"
|
20
|
+
|
21
|
+
require "archive/zip"
|
22
|
+
|
23
|
+
module ChupaText
|
24
|
+
module Decomposers
|
25
|
+
class OfficeOpenXML < Decomposer
|
26
|
+
def target?(data)
|
27
|
+
@extensions.include?(data.extension) or
|
28
|
+
@mime_types.include?(data.mime_type)
|
29
|
+
end
|
30
|
+
|
31
|
+
def target_score(data)
|
32
|
+
if target?(data)
|
33
|
+
-1
|
34
|
+
else
|
35
|
+
nil
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def decompose(data)
|
40
|
+
context = {
|
41
|
+
text: "",
|
42
|
+
attributes: {},
|
43
|
+
}
|
44
|
+
data.open do |input|
|
45
|
+
Archive::Zip.open(input) do |zip|
|
46
|
+
zip.each do |entry|
|
47
|
+
next unless entry.file?
|
48
|
+
case entry.zip_path
|
49
|
+
when "docProps/app.xml"
|
50
|
+
listener = AttributesListener.new(context[:attributes])
|
51
|
+
parse(entry.file_data, listener)
|
52
|
+
when "docProps/core.xml"
|
53
|
+
listener = AttributesListener.new(context[:attributes])
|
54
|
+
parse(entry.file_data, listener)
|
55
|
+
else
|
56
|
+
process_entry(entry, context)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
text = accumulate_text(context)
|
62
|
+
text_data = TextData.new(text, source_data: data)
|
63
|
+
context[:attributes].each do |name, value|
|
64
|
+
text_data[name] = value
|
65
|
+
end
|
66
|
+
yield(text_data)
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
def parse(io, listener)
|
71
|
+
source = REXML::Source.new(io.read)
|
72
|
+
parser = REXML::Parsers::SAX2Parser.new(source)
|
73
|
+
parser.listen(listener)
|
74
|
+
parser.parse
|
75
|
+
end
|
76
|
+
|
77
|
+
def extract_text(entry, texts)
|
78
|
+
listener = TextListener.new(texts, @namespace_uri)
|
79
|
+
parse(entry.file_data, listener)
|
80
|
+
end
|
81
|
+
|
82
|
+
def accumulate_text(context)
|
83
|
+
context[:text]
|
84
|
+
end
|
85
|
+
|
86
|
+
class TextListener
|
87
|
+
include REXML::SAX2Listener
|
88
|
+
|
89
|
+
def initialize(output, target_uri)
|
90
|
+
@output = output
|
91
|
+
@target_uri = target_uri
|
92
|
+
@in_target = false
|
93
|
+
end
|
94
|
+
|
95
|
+
def start_element(uri, local_name, qname, attributes)
|
96
|
+
return unless uri == @target_uri
|
97
|
+
case local_name
|
98
|
+
when "t"
|
99
|
+
@in_target = true
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def end_element(uri, local_name, qname)
|
104
|
+
@in_target = false
|
105
|
+
|
106
|
+
return unless uri == @target_uri
|
107
|
+
case local_name
|
108
|
+
when "p", "br"
|
109
|
+
@output << "\n"
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def characters(text)
|
114
|
+
add_text(text)
|
115
|
+
end
|
116
|
+
|
117
|
+
def cdata(content)
|
118
|
+
add_text(content)
|
119
|
+
end
|
120
|
+
|
121
|
+
private
|
122
|
+
def add_text(text)
|
123
|
+
return unless @in_target
|
124
|
+
@output << CGI.unescapeHTML(text)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
class AttributesListener
|
129
|
+
include REXML::SAX2Listener
|
130
|
+
|
131
|
+
CORE_PROPERTIES_URI =
|
132
|
+
"http://schemas.openxmlformats.org/package/2006/metadata/core-properties"
|
133
|
+
EXTENDED_PROPERTIES_URI =
|
134
|
+
"http://schemas.openxmlformats.org/officeDocument/2006/extended-properties"
|
135
|
+
DUBLIN_CORE_URI = "http://purl.org/dc/elements/1.1/"
|
136
|
+
DUBLIN_CORE_TERMS_URI = "http://purl.org/dc/terms/"
|
137
|
+
|
138
|
+
def initialize(attributes)
|
139
|
+
@attributes = attributes
|
140
|
+
@name = nil
|
141
|
+
@type = nil
|
142
|
+
end
|
143
|
+
|
144
|
+
def start_element(uri, local_name, qname, attributes)
|
145
|
+
case uri
|
146
|
+
when CORE_PROPERTIES_URI
|
147
|
+
case local_name
|
148
|
+
when "keywords"
|
149
|
+
@name = local_name
|
150
|
+
end
|
151
|
+
when EXTENDED_PROPERTIES_URI
|
152
|
+
case local_name
|
153
|
+
when "Application"
|
154
|
+
@name = local_name.downcase
|
155
|
+
end
|
156
|
+
when DUBLIN_CORE_URI
|
157
|
+
case local_name
|
158
|
+
when "description", "title", "subject"
|
159
|
+
@name = local_name
|
160
|
+
end
|
161
|
+
when DUBLIN_CORE_TERMS_URI
|
162
|
+
case local_name
|
163
|
+
when "created", "modified"
|
164
|
+
@name = "#{local_name}_time"
|
165
|
+
@type = :w3cdtf
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
def end_element(uri, local_name, qname)
|
171
|
+
@name = nil
|
172
|
+
@type = nil
|
173
|
+
end
|
174
|
+
|
175
|
+
def characters(text)
|
176
|
+
set_attribute(text)
|
177
|
+
end
|
178
|
+
|
179
|
+
def cdata(content)
|
180
|
+
set_attribute(content)
|
181
|
+
end
|
182
|
+
|
183
|
+
def set_attribute(value)
|
184
|
+
return if @name.nil?
|
185
|
+
|
186
|
+
value = CGI.unescapeHTML(value)
|
187
|
+
case @type
|
188
|
+
when :w3cdtf
|
189
|
+
value = Time.xmlschema(value)
|
190
|
+
end
|
191
|
+
@attributes[@name] = value
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|