chupa-text 1.1.3 → 1.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/doc/text/news.md +12 -0
  3. data/lib/chupa-text/command/chupa-text.rb +7 -1
  4. data/lib/chupa-text/decomposer.rb +8 -0
  5. data/lib/chupa-text/decomposers/office-open-xml-document.rb +51 -0
  6. data/lib/chupa-text/decomposers/office-open-xml-presentation.rb +67 -0
  7. data/lib/chupa-text/decomposers/office-open-xml-workbook.rb +114 -0
  8. data/lib/chupa-text/decomposers/office-open-xml.rb +196 -0
  9. data/lib/chupa-text/decomposers/opendocument-presentation.rb +105 -0
  10. data/lib/chupa-text/decomposers/opendocument-spreadsheet.rb +134 -0
  11. data/lib/chupa-text/decomposers/opendocument-text.rb +89 -0
  12. data/lib/chupa-text/decomposers/opendocument.rb +139 -0
  13. data/lib/chupa-text/extractor.rb +8 -2
  14. data/lib/chupa-text/formatters/mime.rb +3 -2
  15. data/lib/chupa-text/version.rb +1 -1
  16. data/test/decomposers/test-office-open-xml-document.rb +144 -0
  17. data/test/decomposers/test-office-open-xml-presentation.rb +133 -0
  18. data/test/decomposers/test-office-open-xml-workbook.rb +138 -0
  19. data/test/decomposers/test-open-document-presentation.rb +136 -0
  20. data/test/decomposers/test-open-document-spreadsheet.rb +152 -0
  21. data/test/decomposers/test-open-document-text.rb +144 -0
  22. data/test/fixture/docx/attributes.docx +0 -0
  23. data/test/fixture/docx/multi-pages.docx +0 -0
  24. data/test/fixture/docx/one-page.docx +0 -0
  25. data/test/fixture/docx/special-characters.docx +0 -0
  26. data/test/fixture/odp/attributes.odp +0 -0
  27. data/test/fixture/odp/multi-slides.odp +0 -0
  28. data/test/fixture/odp/one-slide.odp +0 -0
  29. data/test/fixture/ods/attributes.ods +0 -0
  30. data/test/fixture/ods/multi-sheets.ods +0 -0
  31. data/test/fixture/ods/one-sheet.ods +0 -0
  32. data/test/fixture/odt/attributes.odt +0 -0
  33. data/test/fixture/odt/multi-pages.odt +0 -0
  34. data/test/fixture/odt/one-page.odt +0 -0
  35. data/test/fixture/odt/special-characters.odt +0 -0
  36. data/test/fixture/pptx/attributes.pptx +0 -0
  37. data/test/fixture/pptx/multi-slides.pptx +0 -0
  38. data/test/fixture/pptx/one-slide.pptx +0 -0
  39. data/test/fixture/xlsx/attributes.xlsx +0 -0
  40. data/test/fixture/xlsx/multi-sheets.xlsx +0 -0
  41. data/test/fixture/xlsx/one-sheet.xlsx +0 -0
  42. metadata +36 -2
@@ -0,0 +1,136 @@
1
+ # Copyright (C) 2019 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # This library is free software; you can redistribute it and/or
4
+ # modify it under the terms of the GNU Lesser General Public
5
+ # License as published by the Free Software Foundation; either
6
+ # version 2.1 of the License, or (at your option) any later version.
7
+ #
8
+ # This library is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
+ # Lesser General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Lesser General Public
14
+ # License along with this library; if not, write to the Free Software
15
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+
17
+ class TestDecomposersOpenDocumentPresentation < Test::Unit::TestCase
18
+ include Helper
19
+
20
+ def setup
21
+ @decomposer = ChupaText::Decomposers::OpenDocumentPresentation.new({})
22
+ end
23
+
24
+ def decompose(path)
25
+ data = ChupaText::InputData.new(path)
26
+ decomposed = []
27
+ @decomposer.decompose(data) do |decomposed_data|
28
+ decomposed << decomposed_data
29
+ end
30
+ decomposed
31
+ end
32
+
33
+ sub_test_case("#target_score") do
34
+ def test_extension
35
+ data = ChupaText::Data.new
36
+ data.body = ""
37
+ data.uri = "document.odp"
38
+ assert_equal(-1, @decomposer.target_score(data))
39
+ end
40
+
41
+ def test_mime_type
42
+ data = ChupaText::Data.new
43
+ data.mime_type = "application/vnd.oasis.opendocument.presentation"
44
+ assert_equal(-1, @decomposer.target_score(data))
45
+ end
46
+ end
47
+
48
+ sub_test_case("#decompose") do
49
+ sub_test_case("attributes") do
50
+ def decompose(attribute_name)
51
+ super(fixture_path("odp", "attributes.odp")).first[attribute_name]
52
+ end
53
+
54
+ def test_title
55
+ assert_equal("Title", decompose("title"))
56
+ end
57
+
58
+ def test_subject
59
+ assert_equal("Subject", decompose("subject"))
60
+ end
61
+
62
+ def test_keywords
63
+ assert_equal(["Keyword1", "Keyword2"], decompose("keywords"))
64
+ end
65
+
66
+ def test_created_time
67
+ assert_equal(Time,
68
+ decompose("created_time").class)
69
+ end
70
+
71
+ def test_modified_time
72
+ assert_equal(Time,
73
+ decompose("modified_time").class)
74
+ end
75
+
76
+ def test_generator
77
+ assert_equal("LibreOffice",
78
+ normalize_generator(decompose("generator")))
79
+ end
80
+
81
+ def normalize_generator(generator)
82
+ if generator.start_with?("LibreOffice")
83
+ "LibreOffice"
84
+ else
85
+ generator
86
+ end
87
+ end
88
+ end
89
+
90
+ sub_test_case("one slide") do
91
+ def decompose
92
+ super(fixture_path("odp", "one-slide.odp"))
93
+ end
94
+
95
+ def test_body
96
+ assert_equal([
97
+ [nil, ""],
98
+ [
99
+ 0,
100
+ "Slide1 title\n" +
101
+ "Slide1 content\n",
102
+ ],
103
+ ],
104
+ decompose.collect {|data| [data["index"], data.body]})
105
+ end
106
+ end
107
+
108
+ sub_test_case("multi slides") do
109
+ def decompose
110
+ super(fixture_path("odp", "multi-slides.odp"))
111
+ end
112
+
113
+ def test_body
114
+ assert_equal([
115
+ [nil, ""],
116
+ [
117
+ 0,
118
+ "Slide1 title\n" +
119
+ "Slide1 content\n",
120
+ ],
121
+ [
122
+ 1,
123
+ "Slide2 title\n" +
124
+ "Slide2 content\n",
125
+ ],
126
+ [
127
+ 2,
128
+ "Slide3 title\n" +
129
+ "Slide3 content\n",
130
+ ],
131
+ ],
132
+ decompose.collect {|data| [data["index"], data.body]})
133
+ end
134
+ end
135
+ end
136
+ end
@@ -0,0 +1,152 @@
1
+ # Copyright (C) 2019 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # This library is free software; you can redistribute it and/or
4
+ # modify it under the terms of the GNU Lesser General Public
5
+ # License as published by the Free Software Foundation; either
6
+ # version 2.1 of the License, or (at your option) any later version.
7
+ #
8
+ # This library is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
+ # Lesser General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Lesser General Public
14
+ # License along with this library; if not, write to the Free Software
15
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+
17
+ class TestDecomposersOpenDocumentSpreadsheet < Test::Unit::TestCase
18
+ include Helper
19
+
20
+ def setup
21
+ @decomposer = ChupaText::Decomposers::OpenDocumentSpreadsheet.new({})
22
+ end
23
+
24
+ def decompose(path)
25
+ data = ChupaText::InputData.new(path)
26
+ decomposed = []
27
+ @decomposer.decompose(data) do |decomposed_data|
28
+ decomposed << decomposed_data
29
+ end
30
+ decomposed
31
+ end
32
+
33
+ sub_test_case("#target_score") do
34
+ def test_extension
35
+ data = ChupaText::Data.new
36
+ data.body = ""
37
+ data.uri = "document.ods"
38
+ assert_equal(-1, @decomposer.target_score(data))
39
+ end
40
+
41
+ def test_mime_type
42
+ data = ChupaText::Data.new
43
+ data.mime_type = "application/vnd.oasis.opendocument.spreadsheet"
44
+ assert_equal(-1, @decomposer.target_score(data))
45
+ end
46
+ end
47
+
48
+ sub_test_case("#decompose") do
49
+ sub_test_case("attributes") do
50
+ def decompose(attribute_name)
51
+ super(fixture_path("ods", "attributes.ods")).first[attribute_name]
52
+ end
53
+
54
+ def test_title
55
+ assert_equal("Title", decompose("title"))
56
+ end
57
+
58
+ def test_subject
59
+ assert_equal("Subject", decompose("subject"))
60
+ end
61
+
62
+ def test_keywords
63
+ assert_equal(["Keyword1", "Keyword2"], decompose("keywords"))
64
+ end
65
+
66
+ def test_created_time
67
+ assert_equal(Time,
68
+ decompose("created_time").class)
69
+ end
70
+
71
+ def test_modified_time
72
+ assert_equal(Time,
73
+ decompose("modified_time").class)
74
+ end
75
+
76
+ def test_generator
77
+ assert_equal("LibreOffice",
78
+ normalize_generator(decompose("generator")))
79
+ end
80
+
81
+ def normalize_generator(generator)
82
+ if generator.start_with?("LibreOffice")
83
+ "LibreOffice"
84
+ else
85
+ generator
86
+ end
87
+ end
88
+ end
89
+
90
+ sub_test_case("one sheet") do
91
+ def decompose
92
+ super(fixture_path("ods", "one-sheet.ods")).collect do |data|
93
+ [
94
+ data["index"],
95
+ data["name"],
96
+ data.body,
97
+ ]
98
+ end
99
+ end
100
+
101
+ def test_body
102
+ assert_equal([
103
+ [nil, nil, ""],
104
+ [
105
+ 0,
106
+ "Sheet1",
107
+ "Sheet1 - A1\tSheet1 - B1\n" +
108
+ "Sheet1 - A2\tSheet1 - B2\n",
109
+ ],
110
+ ],
111
+ decompose)
112
+ end
113
+ end
114
+
115
+ sub_test_case("multi sheets") do
116
+ def decompose
117
+ super(fixture_path("ods", "multi-sheets.ods")).collect do |data|
118
+ [
119
+ data["index"],
120
+ data["name"],
121
+ data.body,
122
+ ]
123
+ end
124
+ end
125
+
126
+ def test_body
127
+ assert_equal([
128
+ [nil, nil, ""],
129
+ [
130
+ 0,
131
+ "Sheet1",
132
+ "Sheet1 - A1\tSheet1 - B1\n" +
133
+ "Sheet1 - A2\tSheet1 - B2\n",
134
+ ],
135
+ [
136
+ 1,
137
+ "Sheet2",
138
+ "Sheet2 - A1\tSheet2 - B1\n" +
139
+ "Sheet2 - A2\tSheet2 - B2\n",
140
+ ],
141
+ [
142
+ 2,
143
+ "Sheet3",
144
+ "Sheet3 - A1\tSheet3 - B1\n" +
145
+ "Sheet3 - A2\tSheet3 - B2\n",
146
+ ],
147
+ ],
148
+ decompose)
149
+ end
150
+ end
151
+ end
152
+ end
@@ -0,0 +1,144 @@
1
+ # Copyright (C) 2019 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # This library is free software; you can redistribute it and/or
4
+ # modify it under the terms of the GNU Lesser General Public
5
+ # License as published by the Free Software Foundation; either
6
+ # version 2.1 of the License, or (at your option) any later version.
7
+ #
8
+ # This library is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
+ # Lesser General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Lesser General Public
14
+ # License along with this library; if not, write to the Free Software
15
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+
17
+ class TestDecomposersOpenDocumentText < Test::Unit::TestCase
18
+ include Helper
19
+
20
+ def setup
21
+ @decomposer = ChupaText::Decomposers::OpenDocumentText.new({})
22
+ end
23
+
24
+ def decompose(path)
25
+ data = ChupaText::InputData.new(path)
26
+ decomposed = []
27
+ @decomposer.decompose(data) do |decomposed_data|
28
+ decomposed << decomposed_data
29
+ end
30
+ decomposed
31
+ end
32
+
33
+ sub_test_case("#target_score") do
34
+ def test_extension
35
+ data = ChupaText::Data.new
36
+ data.body = ""
37
+ data.uri = "document.odt"
38
+ assert_equal(-1, @decomposer.target_score(data))
39
+ end
40
+
41
+ def test_mime_type
42
+ data = ChupaText::Data.new
43
+ data.mime_type = "application/vnd.oasis.opendocument.text"
44
+ assert_equal(-1, @decomposer.target_score(data))
45
+ end
46
+ end
47
+
48
+ sub_test_case("#decompose") do
49
+ sub_test_case("attributes") do
50
+ def decompose(attribute_name)
51
+ super(fixture_path("odt", "attributes.odt")).collect do |data|
52
+ data[attribute_name]
53
+ end
54
+ end
55
+
56
+ def test_title
57
+ assert_equal(["Title"], decompose("title"))
58
+ end
59
+
60
+ def test_author
61
+ assert_equal([nil], decompose("author"))
62
+ end
63
+
64
+ def test_subject
65
+ assert_equal(["Subject"], decompose("subject"))
66
+ end
67
+
68
+ def test_keywords
69
+ assert_equal([["Keyword1", "Keyword2"]], decompose("keywords"))
70
+ end
71
+
72
+ def test_created_time
73
+ assert_equal([Time],
74
+ decompose("created_time").collect(&:class))
75
+ end
76
+
77
+ def test_modified_time
78
+ assert_equal([Time],
79
+ decompose("modified_time").collect(&:class))
80
+ end
81
+
82
+ def test_generator
83
+ assert_equal(["LibreOffice"],
84
+ normalize_generators(decompose("generator")))
85
+ end
86
+
87
+ def normalize_generators(generators)
88
+ generators.collect do |generator|
89
+ normalize_generator(generator)
90
+ end
91
+ end
92
+
93
+ def normalize_generator(generator)
94
+ if generator.start_with?("LibreOffice")
95
+ "LibreOffice"
96
+ else
97
+ generator
98
+ end
99
+ end
100
+
101
+ def test_creation_date
102
+ assert_equal([nil], decompose("creation_date"))
103
+ end
104
+ end
105
+
106
+ sub_test_case("one page") do
107
+ def decompose
108
+ super(fixture_path("odt", "one-page.odt"))
109
+ end
110
+
111
+ def test_body
112
+ assert_equal(["Page1\n"], decompose.collect(&:body))
113
+ end
114
+ end
115
+
116
+ sub_test_case("multi pages") do
117
+ def decompose
118
+ super(fixture_path("odt", "multi-pages.odt"))
119
+ end
120
+
121
+ def test_body
122
+ assert_equal([<<-BODY], decompose.collect(&:body))
123
+ Page1
124
+ Page2
125
+ BODY
126
+ end
127
+ end
128
+
129
+ sub_test_case("special characters") do
130
+ def decompose
131
+ super(fixture_path("odt", "special-characters.odt"))
132
+ end
133
+
134
+ def test_body
135
+ assert_equal([<<-BODY], decompose.collect(&:body))
136
+ Ampersand: &
137
+ Reference: &amp;
138
+ HTML: <a href="">
139
+ Single quote: ''
140
+ BODY
141
+ end
142
+ end
143
+ end
144
+ end