chupa-text 1.1.3 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/doc/text/news.md +12 -0
  3. data/lib/chupa-text/command/chupa-text.rb +7 -1
  4. data/lib/chupa-text/decomposer.rb +8 -0
  5. data/lib/chupa-text/decomposers/office-open-xml-document.rb +51 -0
  6. data/lib/chupa-text/decomposers/office-open-xml-presentation.rb +67 -0
  7. data/lib/chupa-text/decomposers/office-open-xml-workbook.rb +114 -0
  8. data/lib/chupa-text/decomposers/office-open-xml.rb +196 -0
  9. data/lib/chupa-text/decomposers/opendocument-presentation.rb +105 -0
  10. data/lib/chupa-text/decomposers/opendocument-spreadsheet.rb +134 -0
  11. data/lib/chupa-text/decomposers/opendocument-text.rb +89 -0
  12. data/lib/chupa-text/decomposers/opendocument.rb +139 -0
  13. data/lib/chupa-text/extractor.rb +8 -2
  14. data/lib/chupa-text/formatters/mime.rb +3 -2
  15. data/lib/chupa-text/version.rb +1 -1
  16. data/test/decomposers/test-office-open-xml-document.rb +144 -0
  17. data/test/decomposers/test-office-open-xml-presentation.rb +133 -0
  18. data/test/decomposers/test-office-open-xml-workbook.rb +138 -0
  19. data/test/decomposers/test-open-document-presentation.rb +136 -0
  20. data/test/decomposers/test-open-document-spreadsheet.rb +152 -0
  21. data/test/decomposers/test-open-document-text.rb +144 -0
  22. data/test/fixture/docx/attributes.docx +0 -0
  23. data/test/fixture/docx/multi-pages.docx +0 -0
  24. data/test/fixture/docx/one-page.docx +0 -0
  25. data/test/fixture/docx/special-characters.docx +0 -0
  26. data/test/fixture/odp/attributes.odp +0 -0
  27. data/test/fixture/odp/multi-slides.odp +0 -0
  28. data/test/fixture/odp/one-slide.odp +0 -0
  29. data/test/fixture/ods/attributes.ods +0 -0
  30. data/test/fixture/ods/multi-sheets.ods +0 -0
  31. data/test/fixture/ods/one-sheet.ods +0 -0
  32. data/test/fixture/odt/attributes.odt +0 -0
  33. data/test/fixture/odt/multi-pages.odt +0 -0
  34. data/test/fixture/odt/one-page.odt +0 -0
  35. data/test/fixture/odt/special-characters.odt +0 -0
  36. data/test/fixture/pptx/attributes.pptx +0 -0
  37. data/test/fixture/pptx/multi-slides.pptx +0 -0
  38. data/test/fixture/pptx/one-slide.pptx +0 -0
  39. data/test/fixture/xlsx/attributes.xlsx +0 -0
  40. data/test/fixture/xlsx/multi-sheets.xlsx +0 -0
  41. data/test/fixture/xlsx/one-sheet.xlsx +0 -0
  42. metadata +36 -2
@@ -0,0 +1,136 @@
1
+ # Copyright (C) 2019 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # This library is free software; you can redistribute it and/or
4
+ # modify it under the terms of the GNU Lesser General Public
5
+ # License as published by the Free Software Foundation; either
6
+ # version 2.1 of the License, or (at your option) any later version.
7
+ #
8
+ # This library is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
+ # Lesser General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Lesser General Public
14
+ # License along with this library; if not, write to the Free Software
15
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+
17
+ class TestDecomposersOpenDocumentPresentation < Test::Unit::TestCase
18
+ include Helper
19
+
20
+ def setup
21
+ @decomposer = ChupaText::Decomposers::OpenDocumentPresentation.new({})
22
+ end
23
+
24
+ def decompose(path)
25
+ data = ChupaText::InputData.new(path)
26
+ decomposed = []
27
+ @decomposer.decompose(data) do |decomposed_data|
28
+ decomposed << decomposed_data
29
+ end
30
+ decomposed
31
+ end
32
+
33
+ sub_test_case("#target_score") do
34
+ def test_extension
35
+ data = ChupaText::Data.new
36
+ data.body = ""
37
+ data.uri = "document.odp"
38
+ assert_equal(-1, @decomposer.target_score(data))
39
+ end
40
+
41
+ def test_mime_type
42
+ data = ChupaText::Data.new
43
+ data.mime_type = "application/vnd.oasis.opendocument.presentation"
44
+ assert_equal(-1, @decomposer.target_score(data))
45
+ end
46
+ end
47
+
48
+ sub_test_case("#decompose") do
49
+ sub_test_case("attributes") do
50
+ def decompose(attribute_name)
51
+ super(fixture_path("odp", "attributes.odp")).first[attribute_name]
52
+ end
53
+
54
+ def test_title
55
+ assert_equal("Title", decompose("title"))
56
+ end
57
+
58
+ def test_subject
59
+ assert_equal("Subject", decompose("subject"))
60
+ end
61
+
62
+ def test_keywords
63
+ assert_equal(["Keyword1", "Keyword2"], decompose("keywords"))
64
+ end
65
+
66
+ def test_created_time
67
+ assert_equal(Time,
68
+ decompose("created_time").class)
69
+ end
70
+
71
+ def test_modified_time
72
+ assert_equal(Time,
73
+ decompose("modified_time").class)
74
+ end
75
+
76
+ def test_generator
77
+ assert_equal("LibreOffice",
78
+ normalize_generator(decompose("generator")))
79
+ end
80
+
81
+ def normalize_generator(generator)
82
+ if generator.start_with?("LibreOffice")
83
+ "LibreOffice"
84
+ else
85
+ generator
86
+ end
87
+ end
88
+ end
89
+
90
+ sub_test_case("one slide") do
91
+ def decompose
92
+ super(fixture_path("odp", "one-slide.odp"))
93
+ end
94
+
95
+ def test_body
96
+ assert_equal([
97
+ [nil, ""],
98
+ [
99
+ 0,
100
+ "Slide1 title\n" +
101
+ "Slide1 content\n",
102
+ ],
103
+ ],
104
+ decompose.collect {|data| [data["index"], data.body]})
105
+ end
106
+ end
107
+
108
+ sub_test_case("multi slides") do
109
+ def decompose
110
+ super(fixture_path("odp", "multi-slides.odp"))
111
+ end
112
+
113
+ def test_body
114
+ assert_equal([
115
+ [nil, ""],
116
+ [
117
+ 0,
118
+ "Slide1 title\n" +
119
+ "Slide1 content\n",
120
+ ],
121
+ [
122
+ 1,
123
+ "Slide2 title\n" +
124
+ "Slide2 content\n",
125
+ ],
126
+ [
127
+ 2,
128
+ "Slide3 title\n" +
129
+ "Slide3 content\n",
130
+ ],
131
+ ],
132
+ decompose.collect {|data| [data["index"], data.body]})
133
+ end
134
+ end
135
+ end
136
+ end
@@ -0,0 +1,152 @@
1
+ # Copyright (C) 2019 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # This library is free software; you can redistribute it and/or
4
+ # modify it under the terms of the GNU Lesser General Public
5
+ # License as published by the Free Software Foundation; either
6
+ # version 2.1 of the License, or (at your option) any later version.
7
+ #
8
+ # This library is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
+ # Lesser General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Lesser General Public
14
+ # License along with this library; if not, write to the Free Software
15
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+
17
+ class TestDecomposersOpenDocumentSpreadsheet < Test::Unit::TestCase
18
+ include Helper
19
+
20
+ def setup
21
+ @decomposer = ChupaText::Decomposers::OpenDocumentSpreadsheet.new({})
22
+ end
23
+
24
+ def decompose(path)
25
+ data = ChupaText::InputData.new(path)
26
+ decomposed = []
27
+ @decomposer.decompose(data) do |decomposed_data|
28
+ decomposed << decomposed_data
29
+ end
30
+ decomposed
31
+ end
32
+
33
+ sub_test_case("#target_score") do
34
+ def test_extension
35
+ data = ChupaText::Data.new
36
+ data.body = ""
37
+ data.uri = "document.ods"
38
+ assert_equal(-1, @decomposer.target_score(data))
39
+ end
40
+
41
+ def test_mime_type
42
+ data = ChupaText::Data.new
43
+ data.mime_type = "application/vnd.oasis.opendocument.spreadsheet"
44
+ assert_equal(-1, @decomposer.target_score(data))
45
+ end
46
+ end
47
+
48
+ sub_test_case("#decompose") do
49
+ sub_test_case("attributes") do
50
+ def decompose(attribute_name)
51
+ super(fixture_path("ods", "attributes.ods")).first[attribute_name]
52
+ end
53
+
54
+ def test_title
55
+ assert_equal("Title", decompose("title"))
56
+ end
57
+
58
+ def test_subject
59
+ assert_equal("Subject", decompose("subject"))
60
+ end
61
+
62
+ def test_keywords
63
+ assert_equal(["Keyword1", "Keyword2"], decompose("keywords"))
64
+ end
65
+
66
+ def test_created_time
67
+ assert_equal(Time,
68
+ decompose("created_time").class)
69
+ end
70
+
71
+ def test_modified_time
72
+ assert_equal(Time,
73
+ decompose("modified_time").class)
74
+ end
75
+
76
+ def test_generator
77
+ assert_equal("LibreOffice",
78
+ normalize_generator(decompose("generator")))
79
+ end
80
+
81
+ def normalize_generator(generator)
82
+ if generator.start_with?("LibreOffice")
83
+ "LibreOffice"
84
+ else
85
+ generator
86
+ end
87
+ end
88
+ end
89
+
90
+ sub_test_case("one sheet") do
91
+ def decompose
92
+ super(fixture_path("ods", "one-sheet.ods")).collect do |data|
93
+ [
94
+ data["index"],
95
+ data["name"],
96
+ data.body,
97
+ ]
98
+ end
99
+ end
100
+
101
+ def test_body
102
+ assert_equal([
103
+ [nil, nil, ""],
104
+ [
105
+ 0,
106
+ "Sheet1",
107
+ "Sheet1 - A1\tSheet1 - B1\n" +
108
+ "Sheet1 - A2\tSheet1 - B2\n",
109
+ ],
110
+ ],
111
+ decompose)
112
+ end
113
+ end
114
+
115
+ sub_test_case("multi sheets") do
116
+ def decompose
117
+ super(fixture_path("ods", "multi-sheets.ods")).collect do |data|
118
+ [
119
+ data["index"],
120
+ data["name"],
121
+ data.body,
122
+ ]
123
+ end
124
+ end
125
+
126
+ def test_body
127
+ assert_equal([
128
+ [nil, nil, ""],
129
+ [
130
+ 0,
131
+ "Sheet1",
132
+ "Sheet1 - A1\tSheet1 - B1\n" +
133
+ "Sheet1 - A2\tSheet1 - B2\n",
134
+ ],
135
+ [
136
+ 1,
137
+ "Sheet2",
138
+ "Sheet2 - A1\tSheet2 - B1\n" +
139
+ "Sheet2 - A2\tSheet2 - B2\n",
140
+ ],
141
+ [
142
+ 2,
143
+ "Sheet3",
144
+ "Sheet3 - A1\tSheet3 - B1\n" +
145
+ "Sheet3 - A2\tSheet3 - B2\n",
146
+ ],
147
+ ],
148
+ decompose)
149
+ end
150
+ end
151
+ end
152
+ end
@@ -0,0 +1,144 @@
1
+ # Copyright (C) 2019 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # This library is free software; you can redistribute it and/or
4
+ # modify it under the terms of the GNU Lesser General Public
5
+ # License as published by the Free Software Foundation; either
6
+ # version 2.1 of the License, or (at your option) any later version.
7
+ #
8
+ # This library is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
+ # Lesser General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Lesser General Public
14
+ # License along with this library; if not, write to the Free Software
15
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+
17
+ class TestDecomposersOpenDocumentText < Test::Unit::TestCase
18
+ include Helper
19
+
20
+ def setup
21
+ @decomposer = ChupaText::Decomposers::OpenDocumentText.new({})
22
+ end
23
+
24
+ def decompose(path)
25
+ data = ChupaText::InputData.new(path)
26
+ decomposed = []
27
+ @decomposer.decompose(data) do |decomposed_data|
28
+ decomposed << decomposed_data
29
+ end
30
+ decomposed
31
+ end
32
+
33
+ sub_test_case("#target_score") do
34
+ def test_extension
35
+ data = ChupaText::Data.new
36
+ data.body = ""
37
+ data.uri = "document.odt"
38
+ assert_equal(-1, @decomposer.target_score(data))
39
+ end
40
+
41
+ def test_mime_type
42
+ data = ChupaText::Data.new
43
+ data.mime_type = "application/vnd.oasis.opendocument.text"
44
+ assert_equal(-1, @decomposer.target_score(data))
45
+ end
46
+ end
47
+
48
+ sub_test_case("#decompose") do
49
+ sub_test_case("attributes") do
50
+ def decompose(attribute_name)
51
+ super(fixture_path("odt", "attributes.odt")).collect do |data|
52
+ data[attribute_name]
53
+ end
54
+ end
55
+
56
+ def test_title
57
+ assert_equal(["Title"], decompose("title"))
58
+ end
59
+
60
+ def test_author
61
+ assert_equal([nil], decompose("author"))
62
+ end
63
+
64
+ def test_subject
65
+ assert_equal(["Subject"], decompose("subject"))
66
+ end
67
+
68
+ def test_keywords
69
+ assert_equal([["Keyword1", "Keyword2"]], decompose("keywords"))
70
+ end
71
+
72
+ def test_created_time
73
+ assert_equal([Time],
74
+ decompose("created_time").collect(&:class))
75
+ end
76
+
77
+ def test_modified_time
78
+ assert_equal([Time],
79
+ decompose("modified_time").collect(&:class))
80
+ end
81
+
82
+ def test_generator
83
+ assert_equal(["LibreOffice"],
84
+ normalize_generators(decompose("generator")))
85
+ end
86
+
87
+ def normalize_generators(generators)
88
+ generators.collect do |generator|
89
+ normalize_generator(generator)
90
+ end
91
+ end
92
+
93
+ def normalize_generator(generator)
94
+ if generator.start_with?("LibreOffice")
95
+ "LibreOffice"
96
+ else
97
+ generator
98
+ end
99
+ end
100
+
101
+ def test_creation_date
102
+ assert_equal([nil], decompose("creation_date"))
103
+ end
104
+ end
105
+
106
+ sub_test_case("one page") do
107
+ def decompose
108
+ super(fixture_path("odt", "one-page.odt"))
109
+ end
110
+
111
+ def test_body
112
+ assert_equal(["Page1\n"], decompose.collect(&:body))
113
+ end
114
+ end
115
+
116
+ sub_test_case("multi pages") do
117
+ def decompose
118
+ super(fixture_path("odt", "multi-pages.odt"))
119
+ end
120
+
121
+ def test_body
122
+ assert_equal([<<-BODY], decompose.collect(&:body))
123
+ Page1
124
+ Page2
125
+ BODY
126
+ end
127
+ end
128
+
129
+ sub_test_case("special characters") do
130
+ def decompose
131
+ super(fixture_path("odt", "special-characters.odt"))
132
+ end
133
+
134
+ def test_body
135
+ assert_equal([<<-BODY], decompose.collect(&:body))
136
+ Ampersand: &
137
+ Reference: &amp;
138
+ HTML: <a href="">
139
+ Single quote: ''
140
+ BODY
141
+ end
142
+ end
143
+ end
144
+ end