docsplit 0.3.4 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'docsplit'
3
- s.version = '0.3.4' # Keep version in sync with docsplit.rb
4
- s.date = '2010-8-20'
3
+ s.version = '0.4.0' # Keep version in sync with docsplit.rb
4
+ s.date = '2010-8-23'
5
5
 
6
6
  s.homepage = "http://documentcloud.github.com/docsplit/"
7
7
  s.summary = "Break Apart Documents into Images, Text, Pages and PDFs"
@@ -1,7 +1,7 @@
1
1
  # The Docsplit module delegates to the Java PDF extractors.
2
2
  module Docsplit
3
3
 
4
- VERSION = '0.3.4' # Keep in sync with gemspec.
4
+ VERSION = '0.4.0' # Keep in sync with gemspec.
5
5
 
6
6
  ROOT = File.expand_path(File.dirname(__FILE__) + '/..')
7
7
 
@@ -54,7 +54,8 @@ module Docsplit
54
54
  def self.extract_pdf(docs, opts={})
55
55
  [docs].flatten.each do |doc|
56
56
  basename = File.basename(doc, File.extname(doc))
57
- run "-jar #{ROOT}/vendor/jodconverter/jodconverter-cli-2.2.2.jar #{doc} #{opts[:output] || '.'}/#{basename}.pdf", [], {}
57
+ options = "-jar #{ROOT}/vendor/jodconverter/jodconverter-core-3.0-beta-3.jar -r #{ROOT}/vendor/conf/document-formats.js"
58
+ run "#{options} \"#{doc}\" \"#{opts[:output] || '.'}/#{basename}.pdf\"", [], {}
58
59
  end
59
60
  end
60
61
 
@@ -46,6 +46,7 @@ module Docsplit
46
46
  raise ExtractionFailed, result if $? != 0
47
47
  end
48
48
  end
49
+ ensure
49
50
  FileUtils.remove_entry_secure tempdir if File.exists?(tempdir)
50
51
  end
51
52
 
@@ -71,6 +71,7 @@ module Docsplit
71
71
  run "MAGICK_TMPDIR=#{tempdir} OMP_NUM_THREADS=2 gm convert #{MEMORY_ARGS} #{OCR_FLAGS} #{pdf} #{tiff} 2>&1"
72
72
  run "tesseract #{tiff} #{base_path} -l eng 2>&1"
73
73
  end
74
+ ensure
74
75
  FileUtils.remove_entry_secure tempdir if File.exists?(tempdir)
75
76
  end
76
77
 
@@ -0,0 +1,236 @@
1
+ //
2
+ // JODConverter Document Formats Configuration
3
+ //
4
+ [
5
+ {
6
+ "name": "Portable Document Format",
7
+ "extension": "pdf",
8
+ "mediaType": "application/pdf",
9
+ "storePropertiesByFamily": {
10
+ "DRAWING": {"FilterName": "draw_pdf_Export"},
11
+ "SPREADSHEET": {"FilterName": "calc_pdf_Export"},
12
+ "PRESENTATION": {"FilterName": "impress_pdf_Export"},
13
+ "TEXT": {"FilterName": "writer_pdf_Export"}
14
+ }
15
+ },
16
+ {
17
+ "name": "Macromedia Flash",
18
+ "extension": "swf",
19
+ "mediaType": "application/x-shockwave-flash",
20
+ "storePropertiesByFamily": {
21
+ "DRAWING": {"FilterName": "draw_flash_Export"},
22
+ "PRESENTATION": {"FilterName": "impress_flash_Export"}
23
+ }
24
+ },
25
+ {
26
+ "name": "HTML",
27
+ "extension": "html",
28
+ "mediaType": "text/html",
29
+ "inputFamily": "TEXT",
30
+ "storePropertiesByFamily": {
31
+ "SPREADSHEET": {"FilterName": "HTML (StarCalc)"},
32
+ "PRESENTATION": {"FilterName": "impress_html_Export"},
33
+ "TEXT": {"FilterName": "HTML (StarWriter)"}
34
+ }
35
+ },
36
+ {
37
+ "name": "OpenDocument Text",
38
+ "extension": "odt",
39
+ "mediaType": "application/vnd.oasis.opendocument.text",
40
+ "inputFamily": "TEXT",
41
+ "storePropertiesByFamily": {"TEXT": {"FilterName": "writer8"}}
42
+ },
43
+ {
44
+ "name": "OpenOffice.org 1.0 Text Document",
45
+ "extension": "sxw",
46
+ "mediaType": "application/vnd.sun.xml.writer",
47
+ "inputFamily": "TEXT",
48
+ "storePropertiesByFamily": {"TEXT": {"FilterName": "StarOffice XML (Writer)"}}
49
+ },
50
+ {
51
+ "name": "Microsoft Word",
52
+ "extension": "doc",
53
+ "mediaType": "application/msword",
54
+ "inputFamily": "TEXT",
55
+ "storePropertiesByFamily": {"TEXT": {"FilterName": "MS Word 97"}}
56
+ },
57
+ {
58
+ "name": "Microsoft Word 2007 XML",
59
+ "extension": "docx",
60
+ "mediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
61
+ "inputFamily": "TEXT"
62
+ },
63
+ {
64
+ "name": "Rich Text Format",
65
+ "extension": "rtf",
66
+ "mediaType": "text/rtf",
67
+ "inputFamily": "TEXT",
68
+ "storePropertiesByFamily": {"TEXT": {"FilterName": "Rich Text Format"}}
69
+ },
70
+ {
71
+ "name": "WordPerfect",
72
+ "extension": "wpd",
73
+ "mediaType": "application/wordperfect",
74
+ "inputFamily": "TEXT"
75
+ },
76
+ {
77
+ "name": "Plain Text",
78
+ "extension": "txt",
79
+ "mediaType": "text/plain",
80
+ "inputFamily": "TEXT",
81
+ "loadProperties": {
82
+ "FilterName": "Text (encoded)",
83
+ "FilterOptions": "utf8"
84
+ },
85
+ "storePropertiesByFamily": {"TEXT": {
86
+ "FilterName": "Text (encoded)",
87
+ "FilterOptions": "utf8"
88
+ }}
89
+ },
90
+ {
91
+ "name": "MediaWiki wikitext",
92
+ "extension": "wiki",
93
+ "mediaType": "text/x-wiki",
94
+ "storePropertiesByFamily": {"TEXT": {"FilterName": "MediaWiki"}}
95
+ },
96
+ {
97
+ "name": "OpenDocument Spreadsheet",
98
+ "extension": "ods",
99
+ "mediaType": "application/vnd.oasis.opendocument.spreadsheet",
100
+ "inputFamily": "SPREADSHEET",
101
+ "storePropertiesByFamily": {"SPREADSHEET": {"FilterName": "calc8"}}
102
+ },
103
+ {
104
+ "name": "OpenOffice.org 1.0 Spreadsheet",
105
+ "extension": "sxc",
106
+ "mediaType": "application/vnd.sun.xml.calc",
107
+ "inputFamily": "SPREADSHEET",
108
+ "storePropertiesByFamily": {"SPREADSHEET": {"FilterName": "StarOffice XML (Calc)"}}
109
+ },
110
+ {
111
+ "name": "Microsoft Excel",
112
+ "extension": "xls",
113
+ "mediaType": "application/vnd.ms-excel",
114
+ "inputFamily": "SPREADSHEET",
115
+ "storePropertiesByFamily": {"SPREADSHEET": {"FilterName": "MS Excel 97"}}
116
+ },
117
+ {
118
+ "name": "Microsoft Excel 2007 XML",
119
+ "extension": "xlsx",
120
+ "mediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
121
+ "inputFamily": "SPREADSHEET"
122
+ },
123
+ {
124
+ "name": "Comma Separated Values",
125
+ "extension": "csv",
126
+ "mediaType": "text/csv",
127
+ "inputFamily": "SPREADSHEET",
128
+ "loadProperties": {
129
+ "FilterName": "Text - txt - csv (StarCalc)",
130
+ "FilterOptions": "44,34,0"
131
+ },
132
+ "storePropertiesByFamily": {"SPREADSHEET": {
133
+ "FilterName": "Text - txt - csv (StarCalc)",
134
+ "FilterOptions": "44,34,0"
135
+ }}
136
+ },
137
+ {
138
+ "name": "Tab Separated Values",
139
+ "extension": "tsv",
140
+ "mediaType": "text/tab-separated-values",
141
+ "inputFamily": "SPREADSHEET",
142
+ "loadProperties": {
143
+ "FilterName": "Text - txt - csv (StarCalc)",
144
+ "FilterOptions": "9,34,0"
145
+ },
146
+ "storePropertiesByFamily": {"SPREADSHEET": {
147
+ "FilterName": "Text - txt - csv (StarCalc)",
148
+ "FilterOptions": "9,34,0"
149
+ }}
150
+ },
151
+ {
152
+ "name": "OpenDocument Presentation",
153
+ "extension": "odp",
154
+ "mediaType": "application/vnd.oasis.opendocument.presentation",
155
+ "inputFamily": "PRESENTATION",
156
+ "storePropertiesByFamily": {"PRESENTATION": {"FilterName": "impress8"}}
157
+ },
158
+ {
159
+ "name": "OpenOffice.org 1.0 Presentation",
160
+ "extension": "sxi",
161
+ "mediaType": "application/vnd.sun.xml.impress",
162
+ "inputFamily": "PRESENTATION",
163
+ "storePropertiesByFamily": {"PRESENTATION": {"FilterName": "StarOffice XML (Impress)"}}
164
+ },
165
+ {
166
+ "name": "Microsoft PowerPoint",
167
+ "extension": "ppt",
168
+ "mediaType": "application/vnd.ms-powerpoint",
169
+ "inputFamily": "PRESENTATION",
170
+ "storePropertiesByFamily": {"PRESENTATION": {"FilterName": "MS PowerPoint 97"}}
171
+ },
172
+ {
173
+ "name": "Microsoft PowerPoint 2007 XML",
174
+ "extension": "pptx",
175
+ "mediaType": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
176
+ "inputFamily": "PRESENTATION"
177
+ },
178
+ {
179
+ "name": "OpenDocument Drawing",
180
+ "extension": "odg",
181
+ "mediaType": "application/vnd.oasis.opendocument.graphics",
182
+ "inputFamily": "DRAWING",
183
+ "storePropertiesByFamily": {"DRAWING": {"FilterName": "draw8"}}
184
+ },
185
+ {
186
+ "name": "Scalable Vector Graphics",
187
+ "extension": "svg",
188
+ "mediaType": "image/svg+xml",
189
+ "storePropertiesByFamily": {"DRAWING": {"FilterName": "draw_svg_Export"}}
190
+ },
191
+ {
192
+ "name": "Portable Network Graphic",
193
+ "extension": "png",
194
+ "mediaType": "image/png",
195
+ "storePropertiesByFamily": {
196
+ "DRAWING": {"FilterName": "draw_png_Export"},
197
+ "PRESENTATION": {"FilterName": "impress_png_Export"}
198
+ }
199
+ },
200
+ {
201
+ "name": "Graphics Interchange Format",
202
+ "extension": "gif",
203
+ "mediaType": "image/gif",
204
+ "storePropertiesByFamily": {
205
+ "DRAWING": {"FilterName": "draw_gif_Export"},
206
+ "PRESENTATION": {"FilterName": "impress_gif_Export"}
207
+ }
208
+ },
209
+ {
210
+ "name": "Joint Photographic Experts Group",
211
+ "extension": "jpg",
212
+ "mediaType": "image/jpeg",
213
+ "storePropertiesByFamily": {
214
+ "DRAWING": {"FilterName": "draw_jpg_Export"},
215
+ "PRESENTATION": {"FilterName": "impress_jpg_Export"}
216
+ }
217
+ },
218
+ {
219
+ "name": "Windows Bitmap",
220
+ "extension": "bmp",
221
+ "mediaType": "image/bmp",
222
+ "storePropertiesByFamily": {
223
+ "DRAWING": {"FilterName": "draw_bmp_Export"},
224
+ "PRESENTATION": {"FilterName": "impress_bmp_Export"}
225
+ }
226
+ },
227
+ {
228
+ "name": "Tagged Image File Format",
229
+ "extension": "tif",
230
+ "mediaType": "image/tiff",
231
+ "storePropertiesByFamily": {
232
+ "DRAWING": {"FilterName": "draw_tif_Export"},
233
+ "PRESENTATION": {"FilterName": "impress_tif_Export"}
234
+ }
235
+ }
236
+ ]
File without changes
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: docsplit
3
3
  version: !ruby/object:Gem::Version
4
- hash: 27
4
+ hash: 15
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
- - 3
9
8
  - 4
10
- version: 0.3.4
9
+ - 0
10
+ version: 0.4.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Jeremy Ashkenas
@@ -16,7 +16,7 @@ autorequire:
16
16
  bindir: bin
17
17
  cert_chain: []
18
18
 
19
- date: 2010-08-20 00:00:00 -04:00
19
+ date: 2010-08-23 00:00:00 -04:00
20
20
  default_executable:
21
21
  dependencies: []
22
22
 
@@ -37,16 +37,15 @@ files:
37
37
  - lib/docsplit/transparent_pdfs.rb
38
38
  - lib/docsplit.rb
39
39
  - bin/docsplit
40
- - vendor/jodconverter/commons-cli-1.2.jar
40
+ - vendor/conf/document-formats.js
41
+ - vendor/jodconverter/commons-cli-1.1.jar
41
42
  - vendor/jodconverter/commons-io-1.4.jar
42
- - vendor/jodconverter/jodconverter-2.2.2.jar
43
- - vendor/jodconverter/jodconverter-cli-2.2.2.jar
44
- - vendor/jodconverter/juh-3.0.1.jar
45
- - vendor/jodconverter/jurt-3.0.1.jar
46
- - vendor/jodconverter/ridl-3.0.1.jar
47
- - vendor/jodconverter/slf4j-api-1.5.6.jar
48
- - vendor/jodconverter/slf4j-jdk14-1.5.6.jar
49
- - vendor/jodconverter/unoil-3.0.1.jar
43
+ - vendor/jodconverter/jodconverter-core-3.0-beta-3.jar
44
+ - vendor/jodconverter/json-20080701.jar
45
+ - vendor/jodconverter/juh-3.1.0.jar
46
+ - vendor/jodconverter/jurt-3.1.0.jar
47
+ - vendor/jodconverter/ridl-3.1.0.jar
48
+ - vendor/jodconverter/unoil-3.1.0.jar
50
49
  - vendor/logging.properties
51
50
  - docsplit.gemspec
52
51
  - LICENSE