pandoc-ruby 2.0.1 → 2.1.10

Sign up to get free protection for your applications and to get access to all the features.
data/lib/pandoc-ruby.rb CHANGED
@@ -3,66 +3,133 @@ require 'tempfile'
3
3
  require 'timeout'
4
4
 
5
5
  class PandocRuby
6
-
7
- @@pandoc_path = 'pandoc'
6
+ # Use the pandoc command with a custom executable path.
7
+ @pandoc_path = 'pandoc'
8
+ class << self
9
+ attr_accessor :pandoc_path
10
+ end
8
11
 
9
12
  # The available readers and their corresponding names. The keys are used to
10
13
  # generate methods and specify options to Pandoc.
11
14
  READERS = {
12
- 'native' => 'pandoc native',
13
- 'json' => 'pandoc JSON',
14
- 'markdown' => 'markdown',
15
- 'rst' => 'reStructuredText',
16
- 'textile' => 'textile',
17
- 'html' => 'HTML',
18
- 'latex' => 'LaTeX'
15
+ 'biblatex' => 'BibLaTeX bibliography',
16
+ 'bibtex' => 'BibTeX bibliography',
17
+ 'commonmark' => 'CommonMark Markdown',
18
+ 'commonmark_x' => 'CommonMark Markdown with extensions',
19
+ 'creole' => 'Creole 1.0',
20
+ 'csljson' => 'CSL JSON bibliography',
21
+ 'csv' => 'CSV table',
22
+ 'docbook' => 'DocBook',
23
+ 'docx' => 'Word docx',
24
+ 'dokuwiki' => 'DokuWiki markup',
25
+ 'endnotexml' => 'EndNote XML bibliography',
26
+ 'epub' => 'EPUB',
27
+ 'fb2' => 'FictionBook2 e-book',
28
+ 'gfm' => 'GitHub-Flavored Markdown',
29
+ 'haddock' => 'Haddock markup',
30
+ 'html' => 'HTML',
31
+ 'ipynb' => 'Jupyter notebook',
32
+ 'jats' => 'JATS XML',
33
+ 'jira' => 'Jira wiki markup',
34
+ 'json' => 'JSON version of native AST',
35
+ 'latex' => 'LaTex',
36
+ 'man' => 'roff man',
37
+ 'markdown' => "Pandoc's Markdown",
38
+ 'markdown_mmd' => 'MultiMarkdown',
39
+ 'markdown_phpextra' => 'PHP Markdown Extra',
40
+ 'markdown_strict' => 'original unextended Markdown',
41
+ 'mediawiki' => 'MediaWiki markup',
42
+ 'muse' => 'Muse',
43
+ 'native' => 'native Haskell',
44
+ 'odt' => 'ODT',
45
+ 'opml' => 'OPML',
46
+ 'org' => 'Emacs Org mode',
47
+ 'ris' => 'RIS bibliography',
48
+ 'rst' => 'reStructuredText',
49
+ 'rtf' => 'Rich Text Format',
50
+ 't2t' => 'txt2tags',
51
+ 'textile' => 'Textile',
52
+ 'tikiwiki' => 'TikiWiki markup',
53
+ 'tsv' => 'TSV table',
54
+ 'twiki' => 'TWiki markup',
55
+ 'vimwiki' => 'Vimwiki'
19
56
  }.freeze
20
57
 
21
58
  # The available string writers and their corresponding names. The keys are
22
59
  # used to generate methods and specify options to Pandoc.
23
60
  STRING_WRITERS = {
24
- 'native' => 'pandoc native',
25
- 'json' => 'pandoc JSON',
26
- 'html' => 'HTML',
27
- 'html5' => 'HTML5',
28
- 's5' => 'S5 HTML slideshow',
29
- 'slidy' => 'Slidy HTML slideshow',
30
- 'dzslides' => 'Dzslides HTML slideshow',
31
- 'docbook' => 'DocBook XML',
32
- 'opendocument' => 'OpenDocument XML',
33
- 'latex' => 'LaTeX',
34
- 'beamer' => 'Beamer PDF slideshow',
35
- 'context' => 'ConTeXt',
36
- 'texinfo' => 'GNU Texinfo',
37
- 'man' => 'groff man',
38
- 'markdown' => 'markdown',
39
- 'plain' => 'plain',
40
- 'rst' => 'reStructuredText',
41
- 'mediawiki' => 'MediaWiki markup',
42
- 'textile' => 'textile',
43
- 'rtf' => 'rich text format',
44
- 'org' => 'emacs org mode',
45
- 'asciidoc' => 'asciidoc'
61
+ 'asciidoc' => 'AsciiDoc',
62
+ 'asciidoctor' => 'AsciiDoctor',
63
+ 'beamer' => 'LaTeX beamer slide show',
64
+ 'biblatex' => 'BibLaTeX bibliography',
65
+ 'bibtex' => 'BibTeX bibliography',
66
+ 'chunkedhtml' => 'zip archive of multiple linked HTML files',
67
+ 'commonmark' => 'CommonMark Markdown',
68
+ 'commonmark_x' => 'CommonMark Markdown with extensions',
69
+ 'context' => 'ConTeXt',
70
+ 'csljson' => 'CSL JSON bibliography',
71
+ 'docbook' => 'DocBook 4',
72
+ 'docbook4' => 'DocBook 4',
73
+ 'docbook5' => 'DocBook 5',
74
+ 'dokuwiki' => 'DokuWiki markup',
75
+ 'fb2' => 'FictionBook2 e-book',
76
+ 'gfm' => 'GitHub-Flavored Markdown',
77
+ 'haddock' => 'Haddock markup',
78
+ 'html' => 'HTML, i.e. HTML5/XHTML polyglot markup',
79
+ 'html5' => 'HTML, i.e. HTML5/XHTML polyglot markup',
80
+ 'html4' => 'XHTML 1.0 Transitional',
81
+ 'icml' => 'InDesign ICML',
82
+ 'ipynb' => 'Jupyter notebook',
83
+ 'jats_archiving' => 'JATS XML, Archiving and Interchange Tag Set',
84
+ 'jats_articleauthoring' => 'JATS XML, Article Authoring Tag Set',
85
+ 'jats_publishing' => 'JATS XML, Journal Publishing Tag Set',
86
+ 'jats' => 'alias for jats_archiving',
87
+ 'jira' => 'Jira wiki markup',
88
+ 'json' => 'JSON version of native AST',
89
+ 'latex' => 'LaTex',
90
+ 'man' => 'roff man',
91
+ 'markdown' => "Pandoc's Markdown",
92
+ 'markdown_mmd' => 'MultiMarkdown',
93
+ 'markdown_phpextra' => 'PHP Markdown Extra',
94
+ 'markdown_strict' => 'original unextended Markdown',
95
+ 'markua' => 'Markua',
96
+ 'mediawiki' => 'MediaWiki markup',
97
+ 'ms' => 'roff ms',
98
+ 'muse' => 'Muse',
99
+ 'native' => 'native Haskell',
100
+ 'opml' => 'OPML',
101
+ 'opendocument' => 'OpenDocument',
102
+ 'org' => 'Emacs Org mode',
103
+ 'pdf' => 'PDF',
104
+ 'plain' => 'plain text',
105
+ 'pptx' => 'PowerPoint slide show',
106
+ 'rst' => 'reStructuredText',
107
+ 'rtf' => 'Rich Text Format',
108
+ 'texinfo' => 'GNU Texinfo',
109
+ 'textile' => 'Textile',
110
+ 'slideous' => 'Slideous HTML and JavaScript slide show',
111
+ 'slidy' => 'Slidy HTML and JavaScript slide show',
112
+ 'dzslides' => 'DZSlides HTML5 + JavaScript slide show',
113
+ 'revealjs' => 'reveal.js HTML5 + JavaScript slide show',
114
+ 's5' => 'S5 HTML and JavaScript slide show',
115
+ 'tei' => 'TEI Simple',
116
+ 'xwiki' => 'XWiki markup',
117
+ 'zimwiki' => 'ZimWiki markup'
46
118
  }.freeze
47
119
 
48
120
  # The available binary writers and their corresponding names. The keys are
49
121
  # used to generate methods and specify options to Pandoc.
50
122
  BINARY_WRITERS = {
51
- 'odt' => 'OpenDocument',
123
+ 'odt' => 'OpenOffice text document',
52
124
  'docx' => 'Word docx',
53
- 'epub' => 'EPUB V2',
54
- 'epub3' => 'EPUB V3'
125
+ 'epub' => 'EPUB v3',
126
+ 'epub2' => 'EPUB v2',
127
+ 'epub3' => 'EPUB v3'
55
128
  }.freeze
56
129
 
57
130
  # All of the available Writers.
58
131
  WRITERS = STRING_WRITERS.merge(BINARY_WRITERS)
59
132
 
60
- # To use run the pandoc command with a custom executable path, the path
61
- # to the pandoc executable can be set here.
62
- def self.pandoc_path=(path)
63
- @@pandoc_path = path
64
- end
65
-
66
133
  # A shortcut method that creates a new PandocRuby object and immediately
67
134
  # calls `#convert`. Options passed to this method are passed directly to
68
135
  # `#new` and treated the same as if they were passed directly to the
@@ -71,26 +138,33 @@ class PandocRuby
71
138
  new(*args).convert
72
139
  end
73
140
 
141
+ attr_writer :binary_output
142
+
143
+ def binary_output
144
+ @binary_output ||= false
145
+ end
146
+
74
147
  attr_writer :options
148
+
75
149
  def options
76
- @options ||= []
150
+ @options ||= []
77
151
  end
78
152
 
79
153
  attr_writer :option_string
80
- def option_string
81
- @option_string ||= ''
82
- end
83
154
 
84
- attr_writer :binary_output
85
- def binary_output
86
- @binary_output ||= false
155
+ def option_string
156
+ @option_string ||= ''
87
157
  end
88
158
 
89
159
  attr_writer :writer
160
+
90
161
  def writer
91
- @writer ||= 'html'
162
+ @writer ||= 'html'
92
163
  end
93
164
 
165
+ attr_accessor :input_files
166
+ attr_accessor :input_string
167
+
94
168
  # Create a new PandocRuby converter object. The first argument contains the
95
169
  # input either as string or as an array of filenames.
96
170
  #
@@ -101,14 +175,13 @@ class PandocRuby
101
175
  # new(["/path/to/file.md"], :option1 => :value, :option2)
102
176
  # new(["/to/file1.html", "/to/file2.html"], :option1 => :value)
103
177
  def initialize(*args)
104
- @input_string = nil
105
- @input_files = nil
106
-
107
- if args[0].is_a?(String)
108
- @input_string = args.shift
109
- elsif args[0].is_a?(Array)
110
- @input_files = args.shift.join(' ')
178
+ case args[0]
179
+ when String
180
+ self.input_string = args.shift
181
+ when Array
182
+ self.input_files = args.shift.map { |f| "'#{f}'" }.join(' ')
111
183
  end
184
+
112
185
  self.options = args
113
186
  end
114
187
 
@@ -124,8 +197,9 @@ class PandocRuby
124
197
  # PandocRuby.new("# text").convert
125
198
  # # => "<h1 id=\"text\">text</h1>\n"
126
199
  def convert(*args)
127
- self.options += args if args
128
- self.option_string = prepare_options(self.options)
200
+ self.options += args if args
201
+ self.option_string = prepare_options(self.options)
202
+
129
203
  if self.binary_output
130
204
  convert_binary
131
205
  else
@@ -146,6 +220,7 @@ class PandocRuby
146
220
  READERS.each_key do |r|
147
221
  define_method(r) do |*args|
148
222
  args += [{ :from => r }]
223
+
149
224
  new(*args)
150
225
  end
151
226
  end
@@ -163,6 +238,7 @@ class PandocRuby
163
238
  WRITERS.each_key do |w|
164
239
  define_method(:"to_#{w}") do |*args|
165
240
  args += [{ :to => w.to_sym }]
241
+
166
242
  convert(*args)
167
243
  end
168
244
  end
@@ -174,13 +250,17 @@ class PandocRuby
174
250
  # temp file is closed and unlinked.
175
251
  def convert_binary
176
252
  tmp_file = Tempfile.new('pandoc-conversion')
253
+
177
254
  begin
178
- self.options += [{ :output => tmp_file.path }]
179
- self.option_string = "#{self.option_string} --output #{tmp_file.path}"
255
+ self.options += [{ :output => tmp_file.path }]
256
+ self.option_string = "#{self.option_string} --output \"#{tmp_file.path}\""
257
+
180
258
  execute_pandoc
259
+
181
260
  return IO.binread(tmp_file)
182
261
  ensure
183
262
  tmp_file.close
263
+
184
264
  tmp_file.unlink
185
265
  end
186
266
  end
@@ -192,36 +272,40 @@ class PandocRuby
192
272
 
193
273
  # Wrapper to run pandoc in a consistent, DRY way
194
274
  def execute_pandoc
195
- if ! @input_files.nil?
196
- execute("#{@@pandoc_path} #{@input_files}#{self.option_string}")
275
+ if !self.input_files.nil?
276
+ execute("#{PandocRuby.pandoc_path} #{self.input_files}#{self.option_string}")
197
277
  else
198
- execute("#{@@pandoc_path}#{self.option_string}")
278
+ execute("#{PandocRuby.pandoc_path}#{self.option_string}")
199
279
  end
200
280
  end
201
281
 
202
282
  # Run the command and returns the output.
203
283
  def execute(command)
204
284
  output = error = exit_status = nil
205
- @timeout ||= 31_557_600 # A year should be enough?
285
+
286
+ @timeout ||= 31_557_600
287
+
206
288
  Open3.popen3(command) do |stdin, stdout, stderr, wait_thr|
207
289
  begin
208
290
  Timeout.timeout(@timeout) do
209
- unless @input_string.nil?
210
- stdin.puts @input_string
211
- stdin.close
212
- end
213
- output = stdout.read
214
- error = stderr.read
291
+ stdin.puts self.input_string
292
+
293
+ stdin.close
294
+
295
+ output = stdout.read
296
+ error = stderr.read
215
297
  exit_status = wait_thr.value
216
298
  end
217
299
  rescue Timeout::Error => ex
218
300
  Process.kill 9, wait_thr.pid
219
- maybe_ex = "\n#{ex}" if ex
220
- error = "Pandoc timed out after #{@timeout} seconds.#{maybe_ex}"
301
+
302
+ maybe_ex = "\n#{ex}" if ex
303
+ error = "Pandoc timed out after #{@timeout} seconds.#{maybe_ex}"
221
304
  end
222
305
  end
223
306
 
224
307
  raise error unless exit_status && exit_status.success?
308
+
225
309
  output
226
310
  end
227
311
 
@@ -229,14 +313,13 @@ class PandocRuby
229
313
  # opts passed in. Recursively calls itself in order to handle hash options.
230
314
  def prepare_options(opts = [])
231
315
  opts.inject('') do |string, (option, value)|
232
- string += case
233
- when value
234
- create_option(option, value)
235
- when option.respond_to?(:each_pair)
236
- prepare_options(option)
237
- else
238
- create_option(option)
239
- end
316
+ string + if value
317
+ create_option(option, value)
318
+ elsif option.respond_to?(:each_pair)
319
+ prepare_options(option)
320
+ else
321
+ create_option(option)
322
+ end
240
323
  end
241
324
  end
242
325
 
@@ -245,13 +328,15 @@ class PandocRuby
245
328
  # command line options. If the option has an argument, it is also included.
246
329
  def create_option(flag, argument = nil)
247
330
  return '' unless flag
331
+
248
332
  flag = flag.to_s
249
333
  set_pandoc_ruby_options(flag, argument)
250
334
  return '' if flag == 'timeout' # pandoc doesn't accept timeouts yet
251
- if !argument.nil?
252
- "#{format_flag(flag)} #{argument}"
253
- else
335
+
336
+ if argument.nil?
254
337
  format_flag(flag)
338
+ else
339
+ "#{format_flag(flag)} \"#{argument}\""
255
340
  end
256
341
  end
257
342
 
@@ -260,6 +345,8 @@ class PandocRuby
260
345
  def format_flag(flag)
261
346
  if flag.length == 1
262
347
  " -#{flag}"
348
+ elsif flag =~ /^-|\+/
349
+ " #{flag}"
263
350
  else
264
351
  " --#{flag.to_s.tr('_', '-')}"
265
352
  end
@@ -270,11 +357,10 @@ class PandocRuby
270
357
  def set_pandoc_ruby_options(flag, argument = nil)
271
358
  case flag
272
359
  when 't', 'to'
273
- self.writer = argument.to_s
274
- self.binary_output = true if BINARY_WRITERS.keys.include?(self.writer)
360
+ self.writer = argument.to_s
361
+ self.binary_output = true if BINARY_WRITERS.key?(self.writer)
275
362
  when 'timeout'
276
363
  @timeout = argument
277
364
  end
278
365
  end
279
-
280
366
  end
data/pandoc-ruby.gemspec CHANGED
@@ -5,18 +5,16 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = 'pandoc-ruby'
8
- s.version = '2.0.1'
9
-
10
- s.required_rubygems_version = Gem::Requirement.new('>= 0') if s.respond_to? :required_rubygems_version=
8
+ s.version = '2.1.10'
11
9
  s.authors = ['William Melody']
12
- s.date = '2016-05-04'
10
+ s.date = '2023-11-24'
13
11
  s.description = 'Ruby wrapper for Pandoc'
14
12
  s.email = 'hi@williammelody.com'
15
13
  s.extra_rdoc_files = [
16
14
  'LICENSE',
17
15
  'README.md'
18
16
  ]
19
- s.files = %w(
17
+ s.files = %w[
20
18
  .document
21
19
  Gemfile
22
20
  Gemfile.lock
@@ -27,36 +25,15 @@ Gem::Specification.new do |s|
27
25
  pandoc-ruby.gemspec
28
26
  test/benchmark.rb
29
27
  test/files/benchmark.txt
30
- test/files/test.md
28
+ test/files/Test\ File\ 1.md
29
+ test/files/Test\ File\ 2.md
31
30
  test/helper.rb
32
31
  test/test_conversions.rb
33
32
  test/test_pandoc_ruby.rb
34
- )
35
- s.homepage = 'http://github.com/alphabetum/pandoc-ruby'
33
+ ]
34
+ s.homepage = 'http://github.com/xwmx/pandoc-ruby'
36
35
  s.licenses = ['MIT']
37
36
  s.require_paths = ['lib']
38
- s.required_ruby_version = '>= 1.9.3'
39
- s.rubygems_version = '1.8.25'
37
+ s.required_ruby_version = '>= 2.2'
40
38
  s.summary = 'PandocRuby'
41
-
42
- if s.respond_to? :specification_version then
43
- s.specification_version = 3
44
-
45
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
46
- s.add_development_dependency('mocha', '~> 1.1', '>= 1.1.0')
47
- s.add_development_dependency('rake', '~> 10.4', '>= 10.4.2')
48
- s.add_development_dependency('rdoc', '~> 4.2', '>= 4.2.0')
49
- s.add_development_dependency('minitest', '~>5.8.3', '>= 5.8.3')
50
- else
51
- s.add_dependency('mocha', ['~> 1.1.0'])
52
- s.add_dependency('rake', ['~> 10.4.2'])
53
- s.add_dependency('rdoc', ['~> 4.2.0'])
54
- s.add_dependency('minitest', ['~>5.8.3'])
55
- end
56
- else
57
- s.add_dependency('mocha', ['~> 1.1.0'])
58
- s.add_dependency('rake', ['~> 10.4.2'])
59
- s.add_dependency('rdoc', ['~> 4.2.0'])
60
- s.add_dependency('minitest', ['~>5.8.3'])
61
- end
62
39
  end
data/test/benchmark.rb CHANGED
@@ -6,10 +6,10 @@
6
6
  iterations = 100
7
7
  test_file = File.join(File.dirname(__FILE__), 'files', 'benchmark.txt')
8
8
  impl_gems = {
9
- 'BlueCloth' => 'bluecloth',
10
- 'RDiscount' => 'rdiscount',
11
- 'Maruku' => 'maruku',
12
- 'PandocRuby' => 'pandoc-ruby'
9
+ 'BlueCloth' => 'bluecloth',
10
+ 'RDiscount' => 'rdiscount',
11
+ 'Maruku' => 'maruku',
12
+ 'PandocRuby' => 'pandoc-ruby'
13
13
  }
14
14
 
15
15
  implementations = impl_gems.keys
@@ -0,0 +1 @@
1
+ # A Second Title
data/test/helper.rb CHANGED
@@ -3,13 +3,13 @@ require 'bundler'
3
3
  begin
4
4
  Bundler.setup(:default, :development)
5
5
  rescue Bundler::BundlerError => e
6
- $stderr.puts e.message
7
- $stderr.puts 'Run `bundle install` to install missing gems'
6
+ warn e.message
7
+ warn 'Run `bundle install` to install missing gems'
8
8
  exit e.status_code
9
9
  end
10
10
  require 'minitest/autorun'
11
11
  require 'minitest/pride'
12
- require 'mocha/setup'
12
+ require 'mocha/minitest'
13
13
 
14
14
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
15
15
  $LOAD_PATH.unshift(File.dirname(__FILE__))
@@ -23,8 +23,59 @@ describe 'Conversions' do
23
23
  :from => from,
24
24
  :to => to
25
25
  )
26
- assert_equal(converted_content.strip, to_content.strip)
26
+
27
+ assert_equal(
28
+ to_content.strip,
29
+ converted_content.strip,
30
+ <<-HEREDOC
31
+ ---------
32
+ EXPECTED:
33
+ ---------
34
+ #{to_content.strip}
35
+ ---------
36
+ -------
37
+ ACTUAL:
38
+ -------
39
+ #{converted_content.strip}
40
+ -------
41
+ HEREDOC
42
+ )
27
43
  end
28
44
  end
29
45
  end
46
+
47
+ describe '.docx' do
48
+ it "converts from docx to html" do
49
+ converted_content = PandocRuby.convert(
50
+ ['./test/files/reference.docx'],
51
+ :from => 'docx',
52
+ :to => 'html'
53
+ )
54
+ assert_equal("<p>Hello World.</p>", converted_content.strip)
55
+ end
56
+
57
+ it "raises an error when attempting to convert doc with docx format" do
58
+ error = assert_raises(RuntimeError) do
59
+ PandocRuby.convert(
60
+ ['./test/files/reference.doc'],
61
+ :from => 'docx',
62
+ :to => 'html'
63
+ )
64
+ end
65
+
66
+ assert_match(/couldn't unpack docx container/, error.message)
67
+ end
68
+
69
+ it "raises an error when attempting to convert doc with doc format" do
70
+ error = assert_raises(RuntimeError) do
71
+ PandocRuby.convert(
72
+ ['./test/files/reference.doc'],
73
+ :from => 'doc',
74
+ :to => 'html'
75
+ )
76
+ end
77
+
78
+ assert_match(/Pandoc can convert from DOCX, but not from DOC./, error.message)
79
+ end
80
+ end
30
81
  end