chupa-text 1.1.3 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/doc/text/news.md +12 -0
- data/lib/chupa-text/command/chupa-text.rb +7 -1
- data/lib/chupa-text/decomposer.rb +8 -0
- data/lib/chupa-text/decomposers/office-open-xml-document.rb +51 -0
- data/lib/chupa-text/decomposers/office-open-xml-presentation.rb +67 -0
- data/lib/chupa-text/decomposers/office-open-xml-workbook.rb +114 -0
- data/lib/chupa-text/decomposers/office-open-xml.rb +196 -0
- data/lib/chupa-text/decomposers/opendocument-presentation.rb +105 -0
- data/lib/chupa-text/decomposers/opendocument-spreadsheet.rb +134 -0
- data/lib/chupa-text/decomposers/opendocument-text.rb +89 -0
- data/lib/chupa-text/decomposers/opendocument.rb +139 -0
- data/lib/chupa-text/extractor.rb +8 -2
- data/lib/chupa-text/formatters/mime.rb +3 -2
- data/lib/chupa-text/version.rb +1 -1
- data/test/decomposers/test-office-open-xml-document.rb +144 -0
- data/test/decomposers/test-office-open-xml-presentation.rb +133 -0
- data/test/decomposers/test-office-open-xml-workbook.rb +138 -0
- data/test/decomposers/test-open-document-presentation.rb +136 -0
- data/test/decomposers/test-open-document-spreadsheet.rb +152 -0
- data/test/decomposers/test-open-document-text.rb +144 -0
- data/test/fixture/docx/attributes.docx +0 -0
- data/test/fixture/docx/multi-pages.docx +0 -0
- data/test/fixture/docx/one-page.docx +0 -0
- data/test/fixture/docx/special-characters.docx +0 -0
- data/test/fixture/odp/attributes.odp +0 -0
- data/test/fixture/odp/multi-slides.odp +0 -0
- data/test/fixture/odp/one-slide.odp +0 -0
- data/test/fixture/ods/attributes.ods +0 -0
- data/test/fixture/ods/multi-sheets.ods +0 -0
- data/test/fixture/ods/one-sheet.ods +0 -0
- data/test/fixture/odt/attributes.odt +0 -0
- data/test/fixture/odt/multi-pages.odt +0 -0
- data/test/fixture/odt/one-page.odt +0 -0
- data/test/fixture/odt/special-characters.odt +0 -0
- data/test/fixture/pptx/attributes.pptx +0 -0
- data/test/fixture/pptx/multi-slides.pptx +0 -0
- data/test/fixture/pptx/one-slide.pptx +0 -0
- data/test/fixture/xlsx/attributes.xlsx +0 -0
- data/test/fixture/xlsx/multi-sheets.xlsx +0 -0
- data/test/fixture/xlsx/one-sheet.xlsx +0 -0
- metadata +36 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ce1def40525d7278aa45cdbe3af69cd95656fa58d1d02e6ddb3e2677940ed7d6
|
4
|
+
data.tar.gz: 4ec184c4bd0f61508d4b1908e7c89abd8aeb01026c8b3590b404ca672887c6a1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ae43e4354761a953f61cda5348524f44346996133e4f77a310bfbfd07295b4548d41847f8132387ec9ade30c44a9c2bdd7936c7633085534ce03bb5db6f9061f
|
7
|
+
data.tar.gz: f9174e01e21a2dbbc1647d191084969e9fd0cf4dc6f24a469ac7c6c4f2378d7a11946e6b2350e8df86ad5d007c6f048c804d4c1c2b79e95628511383c76c6061
|
data/doc/text/news.md
CHANGED
@@ -1,5 +1,17 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 1.1.4: 2019-02-26
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* Added support for decomposer selection by score.
|
8
|
+
|
9
|
+
* Added support for Office Open XML.
|
10
|
+
|
11
|
+
* Added support for OpenDocument.
|
12
|
+
|
13
|
+
* `chupa-text`: Added `--mime-boundary` option.
|
14
|
+
|
3
15
|
## 1.1.3: 2018-07-18
|
4
16
|
|
5
17
|
### Improvements
|
@@ -46,6 +46,7 @@ module ChupaText
|
|
46
46
|
@uri = nil
|
47
47
|
@mime_type = nil
|
48
48
|
@format = :json
|
49
|
+
@mime_formatter_options = {}
|
49
50
|
@need_screenshot = true
|
50
51
|
@expected_screenshot_size = [200, 200]
|
51
52
|
end
|
@@ -127,6 +128,11 @@ module ChupaText
|
|
127
128
|
"(default: #{@format})") do |format|
|
128
129
|
@format = format
|
129
130
|
end
|
131
|
+
parser.on("--mime-boundary=BOUNDARY",
|
132
|
+
"Use BOUNDARY for MIME boundary.",
|
133
|
+
"(default: Use SHA1 digest of URI)") do |boundary|
|
134
|
+
@mime_formatter_options[:boundary] = boundary
|
135
|
+
end
|
130
136
|
parser.on("--[no-]need-screenshot",
|
131
137
|
"Generate screenshot if available.",
|
132
138
|
"(default: #{@need_screenshot})") do |boolean|
|
@@ -220,7 +226,7 @@ module ChupaText
|
|
220
226
|
when :text
|
221
227
|
Formatters::Text.new($stdout)
|
222
228
|
when :mime
|
223
|
-
Formatters::MIME.new($stdout)
|
229
|
+
Formatters::MIME.new($stdout, @mime_formatter_options)
|
224
230
|
end
|
225
231
|
end
|
226
232
|
end
|
@@ -30,6 +30,14 @@ module ChupaText
|
|
30
30
|
raise NotImplementedError, "must implement #{self.class}\##{__method__}"
|
31
31
|
end
|
32
32
|
|
33
|
+
def target_score(data)
|
34
|
+
if target?(data)
|
35
|
+
0
|
36
|
+
else
|
37
|
+
nil
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
33
41
|
def decompose(data)
|
34
42
|
raise NotImplementedError, "must implement #{self.class}\##{__method__}"
|
35
43
|
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# Copyright (C) 2019 Kouhei Sutou <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# This library is free software; you can redistribute it and/or
|
4
|
+
# modify it under the terms of the GNU Lesser General Public
|
5
|
+
# License as published by the Free Software Foundation; either
|
6
|
+
# version 2.1 of the License, or (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This library is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
11
|
+
# Lesser General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU Lesser General Public
|
14
|
+
# License along with this library; if not, write to the Free Software
|
15
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
16
|
+
|
17
|
+
require "chupa-text/decomposers/office-open-xml"
|
18
|
+
|
19
|
+
module ChupaText
|
20
|
+
module Decomposers
|
21
|
+
class OfficeOpenXMLDocument < OfficeOpenXML
|
22
|
+
registry.register("office-open-xml-document", self)
|
23
|
+
|
24
|
+
def initialize(options={})
|
25
|
+
super
|
26
|
+
@extensions = [
|
27
|
+
"docx",
|
28
|
+
"docm",
|
29
|
+
"dotx",
|
30
|
+
"dotm",
|
31
|
+
]
|
32
|
+
@mime_types = [
|
33
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
34
|
+
"application/vnd.ms-word.document.macroEnabled.12",
|
35
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.template",
|
36
|
+
"application/vnd.ms-word.template.macroEnabled.12",
|
37
|
+
]
|
38
|
+
@namespace_uri =
|
39
|
+
"http://schemas.openxmlformats.org/wordprocessingml/2006/main"
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
def process_entry(entry, context)
|
44
|
+
case entry.zip_path
|
45
|
+
when "word/document.xml"
|
46
|
+
extract_text(entry, context[:text])
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# Copyright (C) 2019 Kouhei Sutou <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# This library is free software; you can redistribute it and/or
|
4
|
+
# modify it under the terms of the GNU Lesser General Public
|
5
|
+
# License as published by the Free Software Foundation; either
|
6
|
+
# version 2.1 of the License, or (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This library is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
11
|
+
# Lesser General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU Lesser General Public
|
14
|
+
# License along with this library; if not, write to the Free Software
|
15
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
16
|
+
|
17
|
+
require "chupa-text/decomposers/office-open-xml"
|
18
|
+
|
19
|
+
module ChupaText
|
20
|
+
module Decomposers
|
21
|
+
class OfficeOpenXMLPresentation < OfficeOpenXML
|
22
|
+
registry.register("office-open-xml-presentation", self)
|
23
|
+
|
24
|
+
def initialize(options={})
|
25
|
+
super
|
26
|
+
@extensions = [
|
27
|
+
"pptx",
|
28
|
+
"pptm",
|
29
|
+
"ppsx",
|
30
|
+
"ppsm",
|
31
|
+
"potx",
|
32
|
+
"potm",
|
33
|
+
"sldx",
|
34
|
+
"sldm",
|
35
|
+
]
|
36
|
+
@mime_types = [
|
37
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
38
|
+
"application/vnd.ms-powerpoint.presentation.macroEnabled.12",
|
39
|
+
"application/vnd.openxmlformats-officedocument.presentationml.slideshow",
|
40
|
+
"application/vnd.ms-powerpoint.slideshow.macroEnabled.12",
|
41
|
+
"application/vnd.openxmlformats-officedocument.presentationml.template",
|
42
|
+
"application/vnd.ms-powerpoint.template.macroEnabled.12",
|
43
|
+
"application/vnd.openxmlformats-officedocument.presentationml.slide",
|
44
|
+
"application/vnd.ms-powerpoint.slide.macroEnabled.12",
|
45
|
+
]
|
46
|
+
@namespace_uri =
|
47
|
+
"http://schemas.openxmlformats.org/drawingml/2006/main"
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
def process_entry(entry, context)
|
52
|
+
case entry.zip_path
|
53
|
+
when /\Appt\/slides\/slide(\d+)\.xml/
|
54
|
+
nth_slide = Integer($1, 10)
|
55
|
+
slide_text = ""
|
56
|
+
extract_text(entry, slide_text)
|
57
|
+
context[:slides] ||= []
|
58
|
+
context[:slides] << [nth_slide, slide_text]
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def accumulate_text(context)
|
63
|
+
context[:slides].sort_by(&:first).collect(&:last).join("\n")
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,114 @@
|
|
1
|
+
# Copyright (C) 2019 Kouhei Sutou <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# This library is free software; you can redistribute it and/or
|
4
|
+
# modify it under the terms of the GNU Lesser General Public
|
5
|
+
# License as published by the Free Software Foundation; either
|
6
|
+
# version 2.1 of the License, or (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This library is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
11
|
+
# Lesser General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU Lesser General Public
|
14
|
+
# License along with this library; if not, write to the Free Software
|
15
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
16
|
+
|
17
|
+
require "chupa-text/decomposers/office-open-xml"
|
18
|
+
|
19
|
+
module ChupaText
|
20
|
+
module Decomposers
|
21
|
+
class OfficeOpenXMLWorkbook < OfficeOpenXML
|
22
|
+
registry.register("office-open-xml-workbook", self)
|
23
|
+
|
24
|
+
def initialize(options={})
|
25
|
+
super
|
26
|
+
@extensions = [
|
27
|
+
"xlsx",
|
28
|
+
"xlsm",
|
29
|
+
"xltx",
|
30
|
+
"xltm",
|
31
|
+
]
|
32
|
+
@mime_types = [
|
33
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
34
|
+
"application/vnd.ms-excel.sheet.macroEnabled.12",
|
35
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.template",
|
36
|
+
"application/vnd.ms-excel.template.macroEnabled.12",
|
37
|
+
]
|
38
|
+
@namespace_uri =
|
39
|
+
"http://schemas.openxmlformats.org/spreadsheetml/2006/main"
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
def process_entry(entry, context)
|
44
|
+
case entry.zip_path
|
45
|
+
when "xl/sharedStrings.xml"
|
46
|
+
context[:shared_strings] = []
|
47
|
+
extract_text(entry, context[:shared_strings])
|
48
|
+
when /\Axl\/worksheets\/sheet(\d+)\.xml\z/
|
49
|
+
nth_sheet = Integer($1, 10)
|
50
|
+
sheet = []
|
51
|
+
listener = SheetListener.new(sheet)
|
52
|
+
parse(entry.file_data, listener)
|
53
|
+
context[:sheets] ||= []
|
54
|
+
context[:sheets] << [nth_sheet, sheet]
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def accumulate_text(context)
|
59
|
+
shared_strings = context[:shared_strings]
|
60
|
+
sheets = context[:sheets].sort_by(&:first).collect(&:last)
|
61
|
+
sheet_texts = sheets.collect do |sheet|
|
62
|
+
sheet_text = ""
|
63
|
+
sheet.each do |row|
|
64
|
+
row_texts = row.collect do |index|
|
65
|
+
shared_strings[index]
|
66
|
+
end
|
67
|
+
sheet_text << row_texts.join("\t") << "\n"
|
68
|
+
end
|
69
|
+
sheet_text
|
70
|
+
end
|
71
|
+
sheet_texts.join("\n")
|
72
|
+
end
|
73
|
+
|
74
|
+
class SheetListener
|
75
|
+
include REXML::SAX2Listener
|
76
|
+
|
77
|
+
URI = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"
|
78
|
+
|
79
|
+
def initialize(sheet)
|
80
|
+
@sheet = sheet
|
81
|
+
@in_v = false
|
82
|
+
end
|
83
|
+
|
84
|
+
def start_element(uri, local_name, qname, attributes)
|
85
|
+
return unless uri == URI
|
86
|
+
case local_name
|
87
|
+
when "row"
|
88
|
+
@sheet << []
|
89
|
+
when "v"
|
90
|
+
@in_v = true
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def end_element(uri, local_name, qname)
|
95
|
+
@in_v = false
|
96
|
+
end
|
97
|
+
|
98
|
+
def characters(text)
|
99
|
+
add_column(text)
|
100
|
+
end
|
101
|
+
|
102
|
+
def cdata(content)
|
103
|
+
add_column(content)
|
104
|
+
end
|
105
|
+
|
106
|
+
private
|
107
|
+
def add_column(text)
|
108
|
+
return unless @in_v
|
109
|
+
@sheet.last << Integer(text, 10)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
@@ -0,0 +1,196 @@
|
|
1
|
+
# Copyright (C) 2019 Kouhei Sutou <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# This library is free software; you can redistribute it and/or
|
4
|
+
# modify it under the terms of the GNU Lesser General Public
|
5
|
+
# License as published by the Free Software Foundation; either
|
6
|
+
# version 2.1 of the License, or (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This library is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
11
|
+
# Lesser General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU Lesser General Public
|
14
|
+
# License along with this library; if not, write to the Free Software
|
15
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
16
|
+
|
17
|
+
require "cgi/util"
|
18
|
+
require "rexml/parsers/sax2parser"
|
19
|
+
require "rexml/sax2listener"
|
20
|
+
|
21
|
+
require "archive/zip"
|
22
|
+
|
23
|
+
module ChupaText
|
24
|
+
module Decomposers
|
25
|
+
class OfficeOpenXML < Decomposer
|
26
|
+
def target?(data)
|
27
|
+
@extensions.include?(data.extension) or
|
28
|
+
@mime_types.include?(data.mime_type)
|
29
|
+
end
|
30
|
+
|
31
|
+
def target_score(data)
|
32
|
+
if target?(data)
|
33
|
+
-1
|
34
|
+
else
|
35
|
+
nil
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def decompose(data)
|
40
|
+
context = {
|
41
|
+
text: "",
|
42
|
+
attributes: {},
|
43
|
+
}
|
44
|
+
data.open do |input|
|
45
|
+
Archive::Zip.open(input) do |zip|
|
46
|
+
zip.each do |entry|
|
47
|
+
next unless entry.file?
|
48
|
+
case entry.zip_path
|
49
|
+
when "docProps/app.xml"
|
50
|
+
listener = AttributesListener.new(context[:attributes])
|
51
|
+
parse(entry.file_data, listener)
|
52
|
+
when "docProps/core.xml"
|
53
|
+
listener = AttributesListener.new(context[:attributes])
|
54
|
+
parse(entry.file_data, listener)
|
55
|
+
else
|
56
|
+
process_entry(entry, context)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
text = accumulate_text(context)
|
62
|
+
text_data = TextData.new(text, source_data: data)
|
63
|
+
context[:attributes].each do |name, value|
|
64
|
+
text_data[name] = value
|
65
|
+
end
|
66
|
+
yield(text_data)
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
def parse(io, listener)
|
71
|
+
source = REXML::Source.new(io.read)
|
72
|
+
parser = REXML::Parsers::SAX2Parser.new(source)
|
73
|
+
parser.listen(listener)
|
74
|
+
parser.parse
|
75
|
+
end
|
76
|
+
|
77
|
+
def extract_text(entry, texts)
|
78
|
+
listener = TextListener.new(texts, @namespace_uri)
|
79
|
+
parse(entry.file_data, listener)
|
80
|
+
end
|
81
|
+
|
82
|
+
def accumulate_text(context)
|
83
|
+
context[:text]
|
84
|
+
end
|
85
|
+
|
86
|
+
class TextListener
|
87
|
+
include REXML::SAX2Listener
|
88
|
+
|
89
|
+
def initialize(output, target_uri)
|
90
|
+
@output = output
|
91
|
+
@target_uri = target_uri
|
92
|
+
@in_target = false
|
93
|
+
end
|
94
|
+
|
95
|
+
def start_element(uri, local_name, qname, attributes)
|
96
|
+
return unless uri == @target_uri
|
97
|
+
case local_name
|
98
|
+
when "t"
|
99
|
+
@in_target = true
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def end_element(uri, local_name, qname)
|
104
|
+
@in_target = false
|
105
|
+
|
106
|
+
return unless uri == @target_uri
|
107
|
+
case local_name
|
108
|
+
when "p", "br"
|
109
|
+
@output << "\n"
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def characters(text)
|
114
|
+
add_text(text)
|
115
|
+
end
|
116
|
+
|
117
|
+
def cdata(content)
|
118
|
+
add_text(content)
|
119
|
+
end
|
120
|
+
|
121
|
+
private
|
122
|
+
def add_text(text)
|
123
|
+
return unless @in_target
|
124
|
+
@output << CGI.unescapeHTML(text)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
class AttributesListener
|
129
|
+
include REXML::SAX2Listener
|
130
|
+
|
131
|
+
CORE_PROPERTIES_URI =
|
132
|
+
"http://schemas.openxmlformats.org/package/2006/metadata/core-properties"
|
133
|
+
EXTENDED_PROPERTIES_URI =
|
134
|
+
"http://schemas.openxmlformats.org/officeDocument/2006/extended-properties"
|
135
|
+
DUBLIN_CORE_URI = "http://purl.org/dc/elements/1.1/"
|
136
|
+
DUBLIN_CORE_TERMS_URI = "http://purl.org/dc/terms/"
|
137
|
+
|
138
|
+
def initialize(attributes)
|
139
|
+
@attributes = attributes
|
140
|
+
@name = nil
|
141
|
+
@type = nil
|
142
|
+
end
|
143
|
+
|
144
|
+
def start_element(uri, local_name, qname, attributes)
|
145
|
+
case uri
|
146
|
+
when CORE_PROPERTIES_URI
|
147
|
+
case local_name
|
148
|
+
when "keywords"
|
149
|
+
@name = local_name
|
150
|
+
end
|
151
|
+
when EXTENDED_PROPERTIES_URI
|
152
|
+
case local_name
|
153
|
+
when "Application"
|
154
|
+
@name = local_name.downcase
|
155
|
+
end
|
156
|
+
when DUBLIN_CORE_URI
|
157
|
+
case local_name
|
158
|
+
when "description", "title", "subject"
|
159
|
+
@name = local_name
|
160
|
+
end
|
161
|
+
when DUBLIN_CORE_TERMS_URI
|
162
|
+
case local_name
|
163
|
+
when "created", "modified"
|
164
|
+
@name = "#{local_name}_time"
|
165
|
+
@type = :w3cdtf
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
def end_element(uri, local_name, qname)
|
171
|
+
@name = nil
|
172
|
+
@type = nil
|
173
|
+
end
|
174
|
+
|
175
|
+
def characters(text)
|
176
|
+
set_attribute(text)
|
177
|
+
end
|
178
|
+
|
179
|
+
def cdata(content)
|
180
|
+
set_attribute(content)
|
181
|
+
end
|
182
|
+
|
183
|
+
def set_attribute(value)
|
184
|
+
return if @name.nil?
|
185
|
+
|
186
|
+
value = CGI.unescapeHTML(value)
|
187
|
+
case @type
|
188
|
+
when :w3cdtf
|
189
|
+
value = Time.xmlschema(value)
|
190
|
+
end
|
191
|
+
@attributes[@name] = value
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|