pandoc-ruby 2.0.1 → 2.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/pandoc-ruby.rb CHANGED
@@ -3,66 +3,133 @@ require 'tempfile'
3
3
  require 'timeout'
4
4
 
5
5
  class PandocRuby
6
-
7
- @@pandoc_path = 'pandoc'
6
+ # Use the pandoc command with a custom executable path.
7
+ @pandoc_path = 'pandoc'
8
+ class << self
9
+ attr_accessor :pandoc_path
10
+ end
8
11
 
9
12
  # The available readers and their corresponding names. The keys are used to
10
13
  # generate methods and specify options to Pandoc.
11
14
  READERS = {
12
- 'native' => 'pandoc native',
13
- 'json' => 'pandoc JSON',
14
- 'markdown' => 'markdown',
15
- 'rst' => 'reStructuredText',
16
- 'textile' => 'textile',
17
- 'html' => 'HTML',
18
- 'latex' => 'LaTeX'
15
+ 'biblatex' => 'BibLaTeX bibliography',
16
+ 'bibtex' => 'BibTeX bibliography',
17
+ 'commonmark' => 'CommonMark Markdown',
18
+ 'commonmark_x' => 'CommonMark Markdown with extensions',
19
+ 'creole' => 'Creole 1.0',
20
+ 'csljson' => 'CSL JSON bibliography',
21
+ 'csv' => 'CSV table',
22
+ 'docbook' => 'DocBook',
23
+ 'docx' => 'Word docx',
24
+ 'dokuwiki' => 'DokuWiki markup',
25
+ 'endnotexml' => 'EndNote XML bibliography',
26
+ 'epub' => 'EPUB',
27
+ 'fb2' => 'FictionBook2 e-book',
28
+ 'gfm' => 'GitHub-Flavored Markdown',
29
+ 'haddock' => 'Haddock markup',
30
+ 'html' => 'HTML',
31
+ 'ipynb' => 'Jupyter notebook',
32
+ 'jats' => 'JATS XML',
33
+ 'jira' => 'Jira wiki markup',
34
+ 'json' => 'JSON version of native AST',
35
+ 'latex' => 'LaTex',
36
+ 'man' => 'roff man',
37
+ 'markdown' => "Pandoc's Markdown",
38
+ 'markdown_mmd' => 'MultiMarkdown',
39
+ 'markdown_phpextra' => 'PHP Markdown Extra',
40
+ 'markdown_strict' => 'original unextended Markdown',
41
+ 'mediawiki' => 'MediaWiki markup',
42
+ 'muse' => 'Muse',
43
+ 'native' => 'native Haskell',
44
+ 'odt' => 'ODT',
45
+ 'opml' => 'OPML',
46
+ 'org' => 'Emacs Org mode',
47
+ 'ris' => 'RIS bibliography',
48
+ 'rst' => 'reStructuredText',
49
+ 'rtf' => 'Rich Text Format',
50
+ 't2t' => 'txt2tags',
51
+ 'textile' => 'Textile',
52
+ 'tikiwiki' => 'TikiWiki markup',
53
+ 'tsv' => 'TSV table',
54
+ 'twiki' => 'TWiki markup',
55
+ 'vimwiki' => 'Vimwiki'
19
56
  }.freeze
20
57
 
21
58
  # The available string writers and their corresponding names. The keys are
22
59
  # used to generate methods and specify options to Pandoc.
23
60
  STRING_WRITERS = {
24
- 'native' => 'pandoc native',
25
- 'json' => 'pandoc JSON',
26
- 'html' => 'HTML',
27
- 'html5' => 'HTML5',
28
- 's5' => 'S5 HTML slideshow',
29
- 'slidy' => 'Slidy HTML slideshow',
30
- 'dzslides' => 'Dzslides HTML slideshow',
31
- 'docbook' => 'DocBook XML',
32
- 'opendocument' => 'OpenDocument XML',
33
- 'latex' => 'LaTeX',
34
- 'beamer' => 'Beamer PDF slideshow',
35
- 'context' => 'ConTeXt',
36
- 'texinfo' => 'GNU Texinfo',
37
- 'man' => 'groff man',
38
- 'markdown' => 'markdown',
39
- 'plain' => 'plain',
40
- 'rst' => 'reStructuredText',
41
- 'mediawiki' => 'MediaWiki markup',
42
- 'textile' => 'textile',
43
- 'rtf' => 'rich text format',
44
- 'org' => 'emacs org mode',
45
- 'asciidoc' => 'asciidoc'
61
+ 'asciidoc' => 'AsciiDoc',
62
+ 'asciidoctor' => 'AsciiDoctor',
63
+ 'beamer' => 'LaTeX beamer slide show',
64
+ 'biblatex' => 'BibLaTeX bibliography',
65
+ 'bibtex' => 'BibTeX bibliography',
66
+ 'chunkedhtml' => 'zip archive of multiple linked HTML files',
67
+ 'commonmark' => 'CommonMark Markdown',
68
+ 'commonmark_x' => 'CommonMark Markdown with extensions',
69
+ 'context' => 'ConTeXt',
70
+ 'csljson' => 'CSL JSON bibliography',
71
+ 'docbook' => 'DocBook 4',
72
+ 'docbook4' => 'DocBook 4',
73
+ 'docbook5' => 'DocBook 5',
74
+ 'dokuwiki' => 'DokuWiki markup',
75
+ 'fb2' => 'FictionBook2 e-book',
76
+ 'gfm' => 'GitHub-Flavored Markdown',
77
+ 'haddock' => 'Haddock markup',
78
+ 'html' => 'HTML, i.e. HTML5/XHTML polyglot markup',
79
+ 'html5' => 'HTML, i.e. HTML5/XHTML polyglot markup',
80
+ 'html4' => 'XHTML 1.0 Transitional',
81
+ 'icml' => 'InDesign ICML',
82
+ 'ipynb' => 'Jupyter notebook',
83
+ 'jats_archiving' => 'JATS XML, Archiving and Interchange Tag Set',
84
+ 'jats_articleauthoring' => 'JATS XML, Article Authoring Tag Set',
85
+ 'jats_publishing' => 'JATS XML, Journal Publishing Tag Set',
86
+ 'jats' => 'alias for jats_archiving',
87
+ 'jira' => 'Jira wiki markup',
88
+ 'json' => 'JSON version of native AST',
89
+ 'latex' => 'LaTex',
90
+ 'man' => 'roff man',
91
+ 'markdown' => "Pandoc's Markdown",
92
+ 'markdown_mmd' => 'MultiMarkdown',
93
+ 'markdown_phpextra' => 'PHP Markdown Extra',
94
+ 'markdown_strict' => 'original unextended Markdown',
95
+ 'markua' => 'Markua',
96
+ 'mediawiki' => 'MediaWiki markup',
97
+ 'ms' => 'roff ms',
98
+ 'muse' => 'Muse',
99
+ 'native' => 'native Haskell',
100
+ 'opml' => 'OPML',
101
+ 'opendocument' => 'OpenDocument',
102
+ 'org' => 'Emacs Org mode',
103
+ 'pdf' => 'PDF',
104
+ 'plain' => 'plain text',
105
+ 'pptx' => 'PowerPoint slide show',
106
+ 'rst' => 'reStructuredText',
107
+ 'rtf' => 'Rich Text Format',
108
+ 'texinfo' => 'GNU Texinfo',
109
+ 'textile' => 'Textile',
110
+ 'slideous' => 'Slideous HTML and JavaScript slide show',
111
+ 'slidy' => 'Slidy HTML and JavaScript slide show',
112
+ 'dzslides' => 'DZSlides HTML5 + JavaScript slide show',
113
+ 'revealjs' => 'reveal.js HTML5 + JavaScript slide show',
114
+ 's5' => 'S5 HTML and JavaScript slide show',
115
+ 'tei' => 'TEI Simple',
116
+ 'xwiki' => 'XWiki markup',
117
+ 'zimwiki' => 'ZimWiki markup'
46
118
  }.freeze
47
119
 
48
120
  # The available binary writers and their corresponding names. The keys are
49
121
  # used to generate methods and specify options to Pandoc.
50
122
  BINARY_WRITERS = {
51
- 'odt' => 'OpenDocument',
123
+ 'odt' => 'OpenOffice text document',
52
124
  'docx' => 'Word docx',
53
- 'epub' => 'EPUB V2',
54
- 'epub3' => 'EPUB V3'
125
+ 'epub' => 'EPUB v3',
126
+ 'epub2' => 'EPUB v2',
127
+ 'epub3' => 'EPUB v3'
55
128
  }.freeze
56
129
 
57
130
  # All of the available Writers.
58
131
  WRITERS = STRING_WRITERS.merge(BINARY_WRITERS)
59
132
 
60
- # To use run the pandoc command with a custom executable path, the path
61
- # to the pandoc executable can be set here.
62
- def self.pandoc_path=(path)
63
- @@pandoc_path = path
64
- end
65
-
66
133
  # A shortcut method that creates a new PandocRuby object and immediately
67
134
  # calls `#convert`. Options passed to this method are passed directly to
68
135
  # `#new` and treated the same as if they were passed directly to the
@@ -71,26 +138,33 @@ class PandocRuby
71
138
  new(*args).convert
72
139
  end
73
140
 
141
+ attr_writer :binary_output
142
+
143
+ def binary_output
144
+ @binary_output ||= false
145
+ end
146
+
74
147
  attr_writer :options
148
+
75
149
  def options
76
- @options ||= []
150
+ @options ||= []
77
151
  end
78
152
 
79
153
  attr_writer :option_string
80
- def option_string
81
- @option_string ||= ''
82
- end
83
154
 
84
- attr_writer :binary_output
85
- def binary_output
86
- @binary_output ||= false
155
+ def option_string
156
+ @option_string ||= ''
87
157
  end
88
158
 
89
159
  attr_writer :writer
160
+
90
161
  def writer
91
- @writer ||= 'html'
162
+ @writer ||= 'html'
92
163
  end
93
164
 
165
+ attr_accessor :input_files
166
+ attr_accessor :input_string
167
+
94
168
  # Create a new PandocRuby converter object. The first argument contains the
95
169
  # input either as string or as an array of filenames.
96
170
  #
@@ -101,14 +175,13 @@ class PandocRuby
101
175
  # new(["/path/to/file.md"], :option1 => :value, :option2)
102
176
  # new(["/to/file1.html", "/to/file2.html"], :option1 => :value)
103
177
  def initialize(*args)
104
- @input_string = nil
105
- @input_files = nil
106
-
107
- if args[0].is_a?(String)
108
- @input_string = args.shift
109
- elsif args[0].is_a?(Array)
110
- @input_files = args.shift.join(' ')
178
+ case args[0]
179
+ when String
180
+ self.input_string = args.shift
181
+ when Array
182
+ self.input_files = args.shift.map { |f| "'#{f}'" }.join(' ')
111
183
  end
184
+
112
185
  self.options = args
113
186
  end
114
187
 
@@ -124,8 +197,9 @@ class PandocRuby
124
197
  # PandocRuby.new("# text").convert
125
198
  # # => "<h1 id=\"text\">text</h1>\n"
126
199
  def convert(*args)
127
- self.options += args if args
128
- self.option_string = prepare_options(self.options)
200
+ self.options += args if args
201
+ self.option_string = prepare_options(self.options)
202
+
129
203
  if self.binary_output
130
204
  convert_binary
131
205
  else
@@ -146,6 +220,7 @@ class PandocRuby
146
220
  READERS.each_key do |r|
147
221
  define_method(r) do |*args|
148
222
  args += [{ :from => r }]
223
+
149
224
  new(*args)
150
225
  end
151
226
  end
@@ -163,6 +238,7 @@ class PandocRuby
163
238
  WRITERS.each_key do |w|
164
239
  define_method(:"to_#{w}") do |*args|
165
240
  args += [{ :to => w.to_sym }]
241
+
166
242
  convert(*args)
167
243
  end
168
244
  end
@@ -174,13 +250,17 @@ class PandocRuby
174
250
  # temp file is closed and unlinked.
175
251
  def convert_binary
176
252
  tmp_file = Tempfile.new('pandoc-conversion')
253
+
177
254
  begin
178
- self.options += [{ :output => tmp_file.path }]
179
- self.option_string = "#{self.option_string} --output #{tmp_file.path}"
255
+ self.options += [{ :output => tmp_file.path }]
256
+ self.option_string = "#{self.option_string} --output \"#{tmp_file.path}\""
257
+
180
258
  execute_pandoc
259
+
181
260
  return IO.binread(tmp_file)
182
261
  ensure
183
262
  tmp_file.close
263
+
184
264
  tmp_file.unlink
185
265
  end
186
266
  end
@@ -192,36 +272,40 @@ class PandocRuby
192
272
 
193
273
  # Wrapper to run pandoc in a consistent, DRY way
194
274
  def execute_pandoc
195
- if ! @input_files.nil?
196
- execute("#{@@pandoc_path} #{@input_files}#{self.option_string}")
275
+ if !self.input_files.nil?
276
+ execute("#{PandocRuby.pandoc_path} #{self.input_files}#{self.option_string}")
197
277
  else
198
- execute("#{@@pandoc_path}#{self.option_string}")
278
+ execute("#{PandocRuby.pandoc_path}#{self.option_string}")
199
279
  end
200
280
  end
201
281
 
202
282
  # Run the command and returns the output.
203
283
  def execute(command)
204
284
  output = error = exit_status = nil
205
- @timeout ||= 31_557_600 # A year should be enough?
285
+
286
+ @timeout ||= 31_557_600
287
+
206
288
  Open3.popen3(command) do |stdin, stdout, stderr, wait_thr|
207
289
  begin
208
290
  Timeout.timeout(@timeout) do
209
- unless @input_string.nil?
210
- stdin.puts @input_string
211
- stdin.close
212
- end
213
- output = stdout.read
214
- error = stderr.read
291
+ stdin.puts self.input_string
292
+
293
+ stdin.close
294
+
295
+ output = stdout.read
296
+ error = stderr.read
215
297
  exit_status = wait_thr.value
216
298
  end
217
299
  rescue Timeout::Error => ex
218
300
  Process.kill 9, wait_thr.pid
219
- maybe_ex = "\n#{ex}" if ex
220
- error = "Pandoc timed out after #{@timeout} seconds.#{maybe_ex}"
301
+
302
+ maybe_ex = "\n#{ex}" if ex
303
+ error = "Pandoc timed out after #{@timeout} seconds.#{maybe_ex}"
221
304
  end
222
305
  end
223
306
 
224
307
  raise error unless exit_status && exit_status.success?
308
+
225
309
  output
226
310
  end
227
311
 
@@ -229,14 +313,13 @@ class PandocRuby
229
313
  # opts passed in. Recursively calls itself in order to handle hash options.
230
314
  def prepare_options(opts = [])
231
315
  opts.inject('') do |string, (option, value)|
232
- string += case
233
- when value
234
- create_option(option, value)
235
- when option.respond_to?(:each_pair)
236
- prepare_options(option)
237
- else
238
- create_option(option)
239
- end
316
+ string + if value
317
+ create_option(option, value)
318
+ elsif option.respond_to?(:each_pair)
319
+ prepare_options(option)
320
+ else
321
+ create_option(option)
322
+ end
240
323
  end
241
324
  end
242
325
 
@@ -245,13 +328,15 @@ class PandocRuby
245
328
  # command line options. If the option has an argument, it is also included.
246
329
  def create_option(flag, argument = nil)
247
330
  return '' unless flag
331
+
248
332
  flag = flag.to_s
249
333
  set_pandoc_ruby_options(flag, argument)
250
334
  return '' if flag == 'timeout' # pandoc doesn't accept timeouts yet
251
- if !argument.nil?
252
- "#{format_flag(flag)} #{argument}"
253
- else
335
+
336
+ if argument.nil?
254
337
  format_flag(flag)
338
+ else
339
+ "#{format_flag(flag)} \"#{argument}\""
255
340
  end
256
341
  end
257
342
 
@@ -260,6 +345,8 @@ class PandocRuby
260
345
  def format_flag(flag)
261
346
  if flag.length == 1
262
347
  " -#{flag}"
348
+ elsif flag =~ /^-|\+/
349
+ " #{flag}"
263
350
  else
264
351
  " --#{flag.to_s.tr('_', '-')}"
265
352
  end
@@ -270,11 +357,10 @@ class PandocRuby
270
357
  def set_pandoc_ruby_options(flag, argument = nil)
271
358
  case flag
272
359
  when 't', 'to'
273
- self.writer = argument.to_s
274
- self.binary_output = true if BINARY_WRITERS.keys.include?(self.writer)
360
+ self.writer = argument.to_s
361
+ self.binary_output = true if BINARY_WRITERS.key?(self.writer)
275
362
  when 'timeout'
276
363
  @timeout = argument
277
364
  end
278
365
  end
279
-
280
366
  end
data/pandoc-ruby.gemspec CHANGED
@@ -5,18 +5,16 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = 'pandoc-ruby'
8
- s.version = '2.0.1'
9
-
10
- s.required_rubygems_version = Gem::Requirement.new('>= 0') if s.respond_to? :required_rubygems_version=
8
+ s.version = '2.1.10'
11
9
  s.authors = ['William Melody']
12
- s.date = '2016-05-04'
10
+ s.date = '2023-11-24'
13
11
  s.description = 'Ruby wrapper for Pandoc'
14
12
  s.email = 'hi@williammelody.com'
15
13
  s.extra_rdoc_files = [
16
14
  'LICENSE',
17
15
  'README.md'
18
16
  ]
19
- s.files = %w(
17
+ s.files = %w[
20
18
  .document
21
19
  Gemfile
22
20
  Gemfile.lock
@@ -27,36 +25,15 @@ Gem::Specification.new do |s|
27
25
  pandoc-ruby.gemspec
28
26
  test/benchmark.rb
29
27
  test/files/benchmark.txt
30
- test/files/test.md
28
+ test/files/Test\ File\ 1.md
29
+ test/files/Test\ File\ 2.md
31
30
  test/helper.rb
32
31
  test/test_conversions.rb
33
32
  test/test_pandoc_ruby.rb
34
- )
35
- s.homepage = 'http://github.com/alphabetum/pandoc-ruby'
33
+ ]
34
+ s.homepage = 'http://github.com/xwmx/pandoc-ruby'
36
35
  s.licenses = ['MIT']
37
36
  s.require_paths = ['lib']
38
- s.required_ruby_version = '>= 1.9.3'
39
- s.rubygems_version = '1.8.25'
37
+ s.required_ruby_version = '>= 2.2'
40
38
  s.summary = 'PandocRuby'
41
-
42
- if s.respond_to? :specification_version then
43
- s.specification_version = 3
44
-
45
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
46
- s.add_development_dependency('mocha', '~> 1.1', '>= 1.1.0')
47
- s.add_development_dependency('rake', '~> 10.4', '>= 10.4.2')
48
- s.add_development_dependency('rdoc', '~> 4.2', '>= 4.2.0')
49
- s.add_development_dependency('minitest', '~>5.8.3', '>= 5.8.3')
50
- else
51
- s.add_dependency('mocha', ['~> 1.1.0'])
52
- s.add_dependency('rake', ['~> 10.4.2'])
53
- s.add_dependency('rdoc', ['~> 4.2.0'])
54
- s.add_dependency('minitest', ['~>5.8.3'])
55
- end
56
- else
57
- s.add_dependency('mocha', ['~> 1.1.0'])
58
- s.add_dependency('rake', ['~> 10.4.2'])
59
- s.add_dependency('rdoc', ['~> 4.2.0'])
60
- s.add_dependency('minitest', ['~>5.8.3'])
61
- end
62
39
  end
data/test/benchmark.rb CHANGED
@@ -6,10 +6,10 @@
6
6
  iterations = 100
7
7
  test_file = File.join(File.dirname(__FILE__), 'files', 'benchmark.txt')
8
8
  impl_gems = {
9
- 'BlueCloth' => 'bluecloth',
10
- 'RDiscount' => 'rdiscount',
11
- 'Maruku' => 'maruku',
12
- 'PandocRuby' => 'pandoc-ruby'
9
+ 'BlueCloth' => 'bluecloth',
10
+ 'RDiscount' => 'rdiscount',
11
+ 'Maruku' => 'maruku',
12
+ 'PandocRuby' => 'pandoc-ruby'
13
13
  }
14
14
 
15
15
  implementations = impl_gems.keys
@@ -0,0 +1 @@
1
+ # A Second Title
data/test/helper.rb CHANGED
@@ -3,13 +3,13 @@ require 'bundler'
3
3
  begin
4
4
  Bundler.setup(:default, :development)
5
5
  rescue Bundler::BundlerError => e
6
- $stderr.puts e.message
7
- $stderr.puts 'Run `bundle install` to install missing gems'
6
+ warn e.message
7
+ warn 'Run `bundle install` to install missing gems'
8
8
  exit e.status_code
9
9
  end
10
10
  require 'minitest/autorun'
11
11
  require 'minitest/pride'
12
- require 'mocha/setup'
12
+ require 'mocha/minitest'
13
13
 
14
14
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
15
15
  $LOAD_PATH.unshift(File.dirname(__FILE__))
@@ -23,8 +23,59 @@ describe 'Conversions' do
23
23
  :from => from,
24
24
  :to => to
25
25
  )
26
- assert_equal(converted_content.strip, to_content.strip)
26
+
27
+ assert_equal(
28
+ to_content.strip,
29
+ converted_content.strip,
30
+ <<-HEREDOC
31
+ ---------
32
+ EXPECTED:
33
+ ---------
34
+ #{to_content.strip}
35
+ ---------
36
+ -------
37
+ ACTUAL:
38
+ -------
39
+ #{converted_content.strip}
40
+ -------
41
+ HEREDOC
42
+ )
27
43
  end
28
44
  end
29
45
  end
46
+
47
+ describe '.docx' do
48
+ it "converts from docx to html" do
49
+ converted_content = PandocRuby.convert(
50
+ ['./test/files/reference.docx'],
51
+ :from => 'docx',
52
+ :to => 'html'
53
+ )
54
+ assert_equal("<p>Hello World.</p>", converted_content.strip)
55
+ end
56
+
57
+ it "raises an error when attempting to convert doc with docx format" do
58
+ error = assert_raises(RuntimeError) do
59
+ PandocRuby.convert(
60
+ ['./test/files/reference.doc'],
61
+ :from => 'docx',
62
+ :to => 'html'
63
+ )
64
+ end
65
+
66
+ assert_match(/couldn't unpack docx container/, error.message)
67
+ end
68
+
69
+ it "raises an error when attempting to convert doc with doc format" do
70
+ error = assert_raises(RuntimeError) do
71
+ PandocRuby.convert(
72
+ ['./test/files/reference.doc'],
73
+ :from => 'doc',
74
+ :to => 'html'
75
+ )
76
+ end
77
+
78
+ assert_match(/Pandoc can convert from DOCX, but not from DOC./, error.message)
79
+ end
80
+ end
30
81
  end