pandoc_rb 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +45 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/.travis.yml +24 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +48 -0
- data/LICENSE +30 -0
- data/LICENSE.txt +31 -0
- data/README.md +83 -0
- data/Rakefile +31 -0
- data/Setup.hs +2 -0
- data/bench/bench.rb +69 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/ext/pandoc_rb/extconf.rb +130 -0
- data/ext/pandoc_rb/pandoc-rb.cabal +74 -0
- data/ext/pandoc_rb/pandoc_rb.c +100 -0
- data/ext/pandoc_rb/src/PandocRb.hs +26 -0
- data/ext/pandoc_rb/src/Text/Pandoc/C.hs +147 -0
- data/ext/pandoc_rb/src/Text/Pandoc/C/Types.hs +73 -0
- data/ext/pandoc_rb/src/Text/Pandoc/C/Utils.hs +78 -0
- data/ext/pandoc_rb/stack.yaml +70 -0
- data/ext/pandoc_rb/test/DetailedSpec.hs +173 -0
- data/lib/pandoc_rb.rb +96 -0
- data/lib/pandoc_rb/error.rb +6 -0
- data/lib/pandoc_rb/parse_failure.rb +11 -0
- data/lib/pandoc_rb/parsec_error.rb +13 -0
- data/lib/pandoc_rb/readers.rb +27 -0
- data/lib/pandoc_rb/version.rb +5 -0
- data/lib/pandoc_rb/writers.rb +46 -0
- data/pandoc_rb.gemspec +37 -0
- data/src/Text/Pandoc/C.hs +217 -0
- data/src/Text/Pandoc/C_stub.h +9 -0
- metadata +206 -0
@@ -0,0 +1,74 @@
|
|
1
|
+
name: pandoc-rb
|
2
|
+
version: 0.1.0.0
|
3
|
+
synopsis: Call pandoc from ruby using the FFI
|
4
|
+
description: This package provides C exports made for use by Ruby for calling Pandoc
|
5
|
+
homepage: https://github.com/michaeljklein/pandoc_rb
|
6
|
+
license: BSD3
|
7
|
+
author: Michael Klein
|
8
|
+
maintainer: lambdamichael@gmail.com
|
9
|
+
category: FFI
|
10
|
+
build-type: Simple
|
11
|
+
cabal-version: >=1.10
|
12
|
+
|
13
|
+
executable PandocRb.dylib
|
14
|
+
main-is: PandocRb.hs
|
15
|
+
other-extensions: ForeignFunctionInterface
|
16
|
+
build-depends: base >= 4.7 && < 5
|
17
|
+
, pandoc-rb
|
18
|
+
, bytestring
|
19
|
+
, either
|
20
|
+
, pandoc
|
21
|
+
, pandoc-types
|
22
|
+
, storable-tuple
|
23
|
+
, transformers
|
24
|
+
, aeson
|
25
|
+
, parsec
|
26
|
+
hs-source-dirs: src
|
27
|
+
other-modules: Text.Pandoc.C
|
28
|
+
, Text.Pandoc.C.Types
|
29
|
+
, Text.Pandoc.C.Utils
|
30
|
+
default-language: Haskell2010
|
31
|
+
include-dirs: src
|
32
|
+
ghc-options: -O3 -shared -fPIC -dynamic -Wall
|
33
|
+
extra-libraries: HSrts-ghc8.0.2
|
34
|
+
|
35
|
+
|
36
|
+
library
|
37
|
+
hs-source-dirs: src
|
38
|
+
exposed-modules: Text.Pandoc.C
|
39
|
+
, Text.Pandoc.C.Types
|
40
|
+
, Text.Pandoc.C.Utils
|
41
|
+
build-depends: base >= 4.7 && < 5
|
42
|
+
, bytestring
|
43
|
+
, either
|
44
|
+
, pandoc == 1.19.2.1337
|
45
|
+
, pandoc-types
|
46
|
+
, storable-tuple
|
47
|
+
, transformers
|
48
|
+
, aeson
|
49
|
+
, parsec
|
50
|
+
ghc-options: -O3 -Wall
|
51
|
+
default-language: Haskell2010
|
52
|
+
|
53
|
+
test-suite pandoc-rb-test
|
54
|
+
type: detailed-0.9
|
55
|
+
hs-source-dirs: test
|
56
|
+
test-module: DetailedSpec
|
57
|
+
build-depends: base >= 4.7 && < 5
|
58
|
+
, Cabal >= 1.20.0
|
59
|
+
, QuickCheck
|
60
|
+
, bytestring
|
61
|
+
, either
|
62
|
+
, pandoc == 1.19.2.1337
|
63
|
+
, pandoc-types
|
64
|
+
, pandoc-rb
|
65
|
+
, storable-tuple
|
66
|
+
, transformers
|
67
|
+
ghc-options: -threaded -rtsopts -with-rtsopts=-N
|
68
|
+
default-language: Haskell2010
|
69
|
+
|
70
|
+
source-repository head
|
71
|
+
type: git
|
72
|
+
location: https://github.com/michaeljklein/pandoc_rb
|
73
|
+
|
74
|
+
|
@@ -0,0 +1,100 @@
|
|
1
|
+
#include <stdio.h>
|
2
|
+
#include "ruby.h"
|
3
|
+
#include "HsFFI.h"
|
4
|
+
#include "PandocRb_stub.h"
|
5
|
+
|
6
|
+
VALUE PandocRb = Qnil;
|
7
|
+
|
8
|
+
void Init_pandoc_rb();
|
9
|
+
|
10
|
+
// a global designating whether the haskell runtime has been initialized
|
11
|
+
int hs_inited = 0;
|
12
|
+
|
13
|
+
VALUE method_convert_init(VALUE self);
|
14
|
+
VALUE method_convert_exit(VALUE self);
|
15
|
+
VALUE method_convert_raw(VALUE self, VALUE in_format, VALUE out_format, VALUE input, VALUE extract_media_path);
|
16
|
+
|
17
|
+
void Init_pandoc_rb() {
|
18
|
+
PandocRb = rb_define_module("PandocRb");
|
19
|
+
rb_define_module_function(PandocRb, "convert_init", method_convert_init, 0);
|
20
|
+
rb_define_module_function(PandocRb, "convert_exit", method_convert_exit, 0);
|
21
|
+
rb_define_module_function(PandocRb, "convert_raw" , method_convert_raw , 4);
|
22
|
+
}
|
23
|
+
|
24
|
+
VALUE method_convert_init(VALUE self) {
|
25
|
+
if (hs_inited) {
|
26
|
+
printf("pandoc_rb warning: Called convert_init after init\n");
|
27
|
+
} else {
|
28
|
+
hs_init(NULL, NULL);
|
29
|
+
hs_inited = 1;
|
30
|
+
}
|
31
|
+
return Qnil;
|
32
|
+
}
|
33
|
+
|
34
|
+
VALUE method_convert_exit(VALUE self) {
|
35
|
+
if (hs_inited) {
|
36
|
+
hs_exit();
|
37
|
+
hs_inited = 0;
|
38
|
+
} else {
|
39
|
+
printf("pandoc_rb warning: Called convert_exit before init\n");
|
40
|
+
}
|
41
|
+
return Qnil;
|
42
|
+
}
|
43
|
+
|
44
|
+
// Only checks whether inputs are strings, has no defaults and niether inits nor exits
|
45
|
+
VALUE method_convert_raw(VALUE self, VALUE in_format, VALUE out_format, VALUE input, VALUE extract_media_path) {
|
46
|
+
char * in_format_ptr;
|
47
|
+
char * out_format_ptr;
|
48
|
+
char * input_ptr;
|
49
|
+
char * extract_media_path_ptr;
|
50
|
+
long in_format_len;
|
51
|
+
long out_format_len;
|
52
|
+
long input_len;
|
53
|
+
long extract_media_path_len;
|
54
|
+
|
55
|
+
void * output_ptr;
|
56
|
+
|
57
|
+
VALUE success;
|
58
|
+
VALUE result_str;
|
59
|
+
VALUE result_pair;
|
60
|
+
|
61
|
+
SafeStringValue(in_format);
|
62
|
+
SafeStringValue(out_format);
|
63
|
+
SafeStringValue(input);
|
64
|
+
SafeStringValue(extract_media_path);
|
65
|
+
in_format_ptr = StringValuePtr(in_format);
|
66
|
+
out_format_ptr = StringValuePtr(out_format);
|
67
|
+
input_ptr = StringValuePtr(input);
|
68
|
+
extract_media_path_ptr = StringValuePtr(extract_media_path);
|
69
|
+
in_format_len = RSTRING_LEN(in_format);
|
70
|
+
out_format_len = RSTRING_LEN(out_format);
|
71
|
+
input_len = RSTRING_LEN(input);
|
72
|
+
extract_media_path_len = RSTRING_LEN(extract_media_path);
|
73
|
+
|
74
|
+
output_ptr = convert_hs(in_format_ptr, in_format_len,
|
75
|
+
out_format_ptr, out_format_len,
|
76
|
+
input_ptr, input_len,
|
77
|
+
extract_media_path_ptr, extract_media_path_len);
|
78
|
+
|
79
|
+
RB_GC_GUARD(in_format);
|
80
|
+
RB_GC_GUARD(out_format);
|
81
|
+
RB_GC_GUARD(input);
|
82
|
+
RB_GC_GUARD(extract_media_path);
|
83
|
+
|
84
|
+
if (result_success(output_ptr)) {
|
85
|
+
success = Qtrue;
|
86
|
+
} else {
|
87
|
+
success = Qfalse;
|
88
|
+
}
|
89
|
+
result_str = rb_utf8_str_new(result_ptr(output_ptr), result_len(output_ptr));
|
90
|
+
|
91
|
+
free_result(output_ptr);
|
92
|
+
|
93
|
+
result_pair = rb_ary_new_from_args(2, success, result_str);
|
94
|
+
|
95
|
+
RB_GC_GUARD(success);
|
96
|
+
RB_GC_GUARD(result_str);
|
97
|
+
|
98
|
+
return result_pair;
|
99
|
+
}
|
100
|
+
|
@@ -0,0 +1,26 @@
|
|
1
|
+
{-# LANGUAGE BangPatterns #-}
|
2
|
+
{-# LANGUAGE DeriveGeneric #-}
|
3
|
+
{-# LANGUAGE FlexibleContexts #-}
|
4
|
+
{-# LANGUAGE ForeignFunctionInterface #-}
|
5
|
+
{-# LANGUAGE RankNTypes #-}
|
6
|
+
{-# LANGUAGE StandaloneDeriving #-}
|
7
|
+
{-# LANGUAGE TupleSections #-}
|
8
|
+
{-# LANGUAGE ScopedTypeVariables #-}
|
9
|
+
|
10
|
+
module PandocRb where
|
11
|
+
|
12
|
+
import Foreign.C.Types (CChar, CLong(..), CInt(..))
|
13
|
+
import Foreign.Ptr (Ptr)
|
14
|
+
import Text.Pandoc.C (convert_hs, result_success, result_ptr, result_len, free_result)
|
15
|
+
|
16
|
+
|
17
|
+
foreign export ccall convert_hs :: Ptr CChar -> CLong -> Ptr CChar -> CLong -> Ptr CChar -> CLong -> Ptr CChar -> CLong -> IO (Ptr (CInt, Ptr CChar, CLong))
|
18
|
+
|
19
|
+
foreign export ccall result_success :: Ptr (CInt, Ptr CChar, CLong) -> IO CInt
|
20
|
+
|
21
|
+
foreign export ccall result_ptr :: Ptr (CInt, Ptr CChar, CLong) -> IO (Ptr CChar)
|
22
|
+
|
23
|
+
foreign export ccall result_len :: Ptr (CInt, Ptr CChar, CLong) -> IO CLong
|
24
|
+
|
25
|
+
foreign export ccall free_result :: Ptr (CInt, Ptr CChar, CLong) -> IO ()
|
26
|
+
|
@@ -0,0 +1,147 @@
|
|
1
|
+
{-# LANGUAGE BangPatterns #-}
|
2
|
+
{-# LANGUAGE DeriveGeneric #-}
|
3
|
+
{-# LANGUAGE FlexibleContexts #-}
|
4
|
+
{-# LANGUAGE ForeignFunctionInterface #-}
|
5
|
+
{-# LANGUAGE RankNTypes #-}
|
6
|
+
{-# LANGUAGE StandaloneDeriving #-}
|
7
|
+
{-# LANGUAGE TupleSections #-}
|
8
|
+
{-# LANGUAGE ScopedTypeVariables #-}
|
9
|
+
|
10
|
+
module Text.Pandoc.C where
|
11
|
+
|
12
|
+
import Control.Monad.Trans.Class (lift)
|
13
|
+
import Control.Monad.Trans.Either (EitherT(..))
|
14
|
+
import Data.Bifunctor (bimap, second)
|
15
|
+
import Data.ByteString (useAsCStringLen)
|
16
|
+
import Data.ByteString.Lazy (toStrict, fromStrict)
|
17
|
+
import Data.ByteString.Unsafe (unsafePackCStringLen)
|
18
|
+
import Data.Monoid ((<>))
|
19
|
+
import Foreign.C.String (CStringLen, peekCStringLen, newCStringLen)
|
20
|
+
import Foreign.C.Types (CChar, CLong(..), CInt(..))
|
21
|
+
import Foreign.Marshal.Alloc (malloc, free)
|
22
|
+
import Foreign.Ptr (Ptr)
|
23
|
+
import Foreign.Storable (peek, poke)
|
24
|
+
import Foreign.Storable.Tuple ()
|
25
|
+
import Text.Pandoc (ReaderOptions, WriterOptions(..), Reader(..), Writer(..), HTMLMathMethod(..), writerMediaBag, getReader, getWriter, def)
|
26
|
+
|
27
|
+
import Text.Pandoc.C.Utils
|
28
|
+
import Text.Pandoc.C.Types
|
29
|
+
|
30
|
+
|
31
|
+
-- | Convert a `Reader` to a `CReader`
|
32
|
+
mkCReader :: Reader -> CReader
|
33
|
+
mkCReader !(StringReader reader) !opts !cstr = mapML showPandocError . EitherT $ do
|
34
|
+
str <- peekCStringLen cstr
|
35
|
+
readerResult <- reader opts str
|
36
|
+
return $ second (, mempty) $! readerResult
|
37
|
+
mkCReader ~(ByteStringReader reader) !opts str = mapML showPandocError . EitherT $ do
|
38
|
+
bs <- unsafePackCStringLen str
|
39
|
+
readerResult <- reader opts $! fromStrict bs
|
40
|
+
return readerResult
|
41
|
+
|
42
|
+
|
43
|
+
-- | Convert a `Writer` to a `CWriter`
|
44
|
+
mkCWriter :: Writer -> CWriter
|
45
|
+
mkCWriter !(PureStringWriter writer) !opts !pandoc = lift $ newCStringLen (writer opts pandoc)
|
46
|
+
mkCWriter !(IOStringWriter writer) !opts !pandoc = lift $ writer opts pandoc >>= newCStringLen
|
47
|
+
mkCWriter !(IOByteStringWriter writer) !opts !pandoc = lift $ writer opts pandoc >>= flip useAsCStringLen return . toStrict
|
48
|
+
|
49
|
+
|
50
|
+
-- | `getReader` for foreign C exports. Retrieve reader based on formatSpec (format+extensions).
|
51
|
+
getCReader :: CStringLen -> EitherT CStringLen IO CReader
|
52
|
+
getCReader !cstr = do
|
53
|
+
str <- lift . peekCStringLen $! cstr
|
54
|
+
EitherT $ case getReader str of
|
55
|
+
( Left err ) -> Left <$> newCStringLen err
|
56
|
+
~(Right reader) -> return . Right $! mkCReader reader
|
57
|
+
|
58
|
+
-- | `getWriter` for foreign C exports. Retrieve writer based on formatSpec (format+extensions).
|
59
|
+
getCWriter :: CStringLen -> EitherT CStringLen IO CWriter
|
60
|
+
getCWriter !cstr = do
|
61
|
+
str <- lift $ peekCStringLen cstr
|
62
|
+
EitherT $ case getWriter str of
|
63
|
+
( Left err ) -> Left <$> newCStringLen err
|
64
|
+
~(Right writer) -> return . Right $! mkCWriter writer
|
65
|
+
|
66
|
+
|
67
|
+
-- | The main conversion function.
|
68
|
+
-- Takes `ReaderOptions`, `WriterOptions`, reader's formatSpec,
|
69
|
+
-- writer's formatSpec, input and returns either an error string
|
70
|
+
-- or the output
|
71
|
+
convert :: ReaderOptions
|
72
|
+
-> WriterOptions
|
73
|
+
-> CStringLen -- ^ Reader specification
|
74
|
+
-> CStringLen -- ^ Writer specification
|
75
|
+
-> CStringLen -- ^ Input
|
76
|
+
-> CStringLen -- ^ Extract media dir
|
77
|
+
-> EitherT CStringLen IO CStringLen -- ^ Either an error string or the output
|
78
|
+
convert !readerOpts !writerOpts !readerStr !writerStr !input !mediaBagStr = do
|
79
|
+
reader <- getCReader $! readerStr
|
80
|
+
(pandoc, mediaBag) <- reader readerOpts $! input
|
81
|
+
pandoc' <- extractCMediabag mediaBagStr mediaBag pandoc
|
82
|
+
writer <- getCWriter $! writerStr
|
83
|
+
let writerOpts' = writerOpts { writerMediaBag = writerMediaBag writerOpts <> mediaBag }
|
84
|
+
writer writerOpts' pandoc'
|
85
|
+
|
86
|
+
|
87
|
+
-- | The `ReaderOptions` wired into the foreign export
|
88
|
+
readerOptions :: ReaderOptions
|
89
|
+
readerOptions = def
|
90
|
+
|
91
|
+
-- | The `WriterOptions` wired into the foreign export
|
92
|
+
writerOptions :: WriterOptions
|
93
|
+
writerOptions = def { writerHTMLMathMethod = MathJax "https://fail.cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS-MML_HTMLorMML" }
|
94
|
+
|
95
|
+
|
96
|
+
-- | Takes: reader, writer, input and returns (isSuccess, output pointer, output length)
|
97
|
+
convert_hs :: Ptr CChar -- ^ reader format pointer
|
98
|
+
-> CLong -- ^ reader format length
|
99
|
+
-> Ptr CChar -- ^ writer format pointer
|
100
|
+
-> CLong -- ^ writer format length
|
101
|
+
-> Ptr CChar -- ^ input pointer
|
102
|
+
-> CLong -- ^ input length
|
103
|
+
-> Ptr CChar -- ^ media extraction path pointer
|
104
|
+
-> CLong -- ^ media extraction path length
|
105
|
+
-> IO (Ptr (CInt, Ptr CChar, CLong)) -- ^ Pointer to @(success, output pointer, output length)@
|
106
|
+
convert_hs !readerStr !readerLen !writerStr !writerLen !input !inputLen !mediaBagStr !mediaBagLen = do
|
107
|
+
let readerStr' = (readerStr , fromEnum readerLen )
|
108
|
+
let writerStr' = (writerStr , fromEnum writerLen )
|
109
|
+
let input' = (input , fromEnum inputLen )
|
110
|
+
let mediaBagStr' = (mediaBagStr, fromEnum mediaBagLen)
|
111
|
+
let converted = convert readerOptions writerOptions readerStr' writerStr' input' mediaBagStr'
|
112
|
+
let EitherT safeConverted = timeoutEitherT (10 * {- minutes -} 60000000) converted
|
113
|
+
result <- bimap (second toEnum) (second toEnum) <$> safeConverted
|
114
|
+
let (success, (rstrPtr, rstrLen)) = either (0,) (1,) result
|
115
|
+
addr <- malloc
|
116
|
+
addr `poke` (success, rstrPtr, rstrLen)
|
117
|
+
return addr
|
118
|
+
|
119
|
+
|
120
|
+
-- | Extract the success value from a result pointer
|
121
|
+
result_success :: Ptr (CInt, Ptr CChar, CLong) -> IO CInt
|
122
|
+
result_success addr = do
|
123
|
+
(success, _, _) <- peek addr
|
124
|
+
return success
|
125
|
+
|
126
|
+
-- | Extract the pointer to the result string from a result pointer
|
127
|
+
result_ptr :: Ptr (CInt, Ptr CChar, CLong) -> IO (Ptr CChar)
|
128
|
+
result_ptr addr = do
|
129
|
+
(_, ptr, _) <- peek addr
|
130
|
+
return ptr
|
131
|
+
|
132
|
+
-- | Extract the length (in bytes) of the result string from a result pointer
|
133
|
+
result_len :: Ptr (CInt, Ptr CChar, CLong) -> IO CLong
|
134
|
+
result_len addr = do
|
135
|
+
(_, _, len) <- peek addr
|
136
|
+
return len
|
137
|
+
|
138
|
+
-- | Free a result pointer. Note: This only frees the struct that wraps the
|
139
|
+
-- result values, it does not free the result string's memory
|
140
|
+
free_result :: Ptr (CInt, Ptr CChar, CLong) -> IO ()
|
141
|
+
free_result addr = do
|
142
|
+
-- (_, ptr, _) <- peek addr -- this is commented out because the caller manages it
|
143
|
+
-- free ptr
|
144
|
+
free addr
|
145
|
+
return ()
|
146
|
+
|
147
|
+
|
@@ -0,0 +1,73 @@
|
|
1
|
+
{-# LANGUAGE BangPatterns #-}
|
2
|
+
{-# LANGUAGE DeriveGeneric #-}
|
3
|
+
{-# LANGUAGE FlexibleContexts #-}
|
4
|
+
{-# LANGUAGE ForeignFunctionInterface #-}
|
5
|
+
{-# LANGUAGE RankNTypes #-}
|
6
|
+
{-# LANGUAGE StandaloneDeriving #-}
|
7
|
+
{-# LANGUAGE TupleSections #-}
|
8
|
+
{-# LANGUAGE ScopedTypeVariables #-}
|
9
|
+
|
10
|
+
module Text.Pandoc.C.Types where
|
11
|
+
|
12
|
+
import Control.Monad.Trans.Either (EitherT(..))
|
13
|
+
import Data.Aeson (ToJSON(..), genericToEncoding, defaultOptions, genericToJSON, encode)
|
14
|
+
import Data.ByteString.Lazy (toStrict)
|
15
|
+
import Data.ByteString (useAsCStringLen)
|
16
|
+
import Foreign.C.String (CStringLen)
|
17
|
+
import Foreign.C.Types (CChar, CLong(..))
|
18
|
+
import Foreign.Ptr (Ptr)
|
19
|
+
import GHC.Generics (Generic)
|
20
|
+
import Text.Pandoc (ReaderOptions, Pandoc, WriterOptions(..), PandocError(..))
|
21
|
+
import Text.Pandoc.MediaBag (MediaBag)
|
22
|
+
import Text.Parsec.Error (ParseError, Message(..), errorPos, errorMessages)
|
23
|
+
import Text.Parsec.Pos (SourcePos, SourceName, Line, Column, sourceName, sourceLine, sourceColumn)
|
24
|
+
|
25
|
+
|
26
|
+
-- | The `Reader` type modified for foreign C exports
|
27
|
+
type CReader = ReaderOptions -> CStringLen -> EitherT CStringLen IO (Pandoc, MediaBag)
|
28
|
+
|
29
|
+
-- | The `Writer` type modified for foreign C exports
|
30
|
+
type CWriter = WriterOptions -> Pandoc -> EitherT CStringLen IO CStringLen
|
31
|
+
|
32
|
+
|
33
|
+
-- | A clone of `SourcePos`, to allow deriving `Generic`
|
34
|
+
data SourcePos' = SourcePos' SourceName !Line !Column deriving (Eq, Ord, Show, Generic)
|
35
|
+
|
36
|
+
instance ToJSON SourcePos' where
|
37
|
+
toEncoding = genericToEncoding defaultOptions
|
38
|
+
|
39
|
+
deriving instance Generic Message
|
40
|
+
|
41
|
+
instance ToJSON Message where
|
42
|
+
toEncoding = genericToEncoding defaultOptions
|
43
|
+
|
44
|
+
-- | A clone of `ParseError`, to allow deriving `Generic`
|
45
|
+
data ParseError' = ParseError' !SourcePos' [Message] deriving (Generic)
|
46
|
+
|
47
|
+
-- | Convert to our clone of `SourcePos`
|
48
|
+
sourcePos' :: SourcePos -> SourcePos'
|
49
|
+
sourcePos' !sp = SourcePos' (sourceName sp) (sourceLine sp) (sourceColumn sp)
|
50
|
+
|
51
|
+
-- | Convert to our clone of `ParseError`
|
52
|
+
parseError' :: ParseError -> ParseError'
|
53
|
+
parseError' !pe = ParseError' (sourcePos' (errorPos pe)) (errorMessages pe)
|
54
|
+
|
55
|
+
|
56
|
+
instance ToJSON ParseError where
|
57
|
+
toJSON = genericToJSON defaultOptions . parseError'
|
58
|
+
toEncoding = genericToEncoding defaultOptions . parseError'
|
59
|
+
|
60
|
+
instance ToJSON PandocError where
|
61
|
+
toEncoding = genericToEncoding defaultOptions
|
62
|
+
|
63
|
+
-- | Convert a `PandocError` to JSON
|
64
|
+
showPandocError :: PandocError -> IO CStringLen
|
65
|
+
showPandocError = flip useAsCStringLen return . toStrict . encode
|
66
|
+
|
67
|
+
|
68
|
+
-- | `CStringLen` for use by Ruby
|
69
|
+
type RStringLen = (Ptr CChar, CLong)
|
70
|
+
|
71
|
+
|
72
|
+
|
73
|
+
|
@@ -0,0 +1,78 @@
|
|
1
|
+
{-# LANGUAGE BangPatterns #-}
|
2
|
+
{-# LANGUAGE DeriveGeneric #-}
|
3
|
+
{-# LANGUAGE FlexibleContexts #-}
|
4
|
+
{-# LANGUAGE ForeignFunctionInterface #-}
|
5
|
+
{-# LANGUAGE RankNTypes #-}
|
6
|
+
{-# LANGUAGE StandaloneDeriving #-}
|
7
|
+
{-# LANGUAGE TupleSections #-}
|
8
|
+
{-# LANGUAGE ScopedTypeVariables #-}
|
9
|
+
|
10
|
+
module Text.Pandoc.C.Utils where
|
11
|
+
|
12
|
+
import Control.Concurrent
|
13
|
+
import Control.Monad.Trans.Class (lift)
|
14
|
+
import Control.Monad.Trans.Either (EitherT(..))
|
15
|
+
import Foreign.C.String (CStringLen, peekCStringLen, newCStringLen)
|
16
|
+
import System.Timeout (timeout)
|
17
|
+
import Text.Pandoc (Pandoc, Inline(..))
|
18
|
+
import Text.Pandoc.MediaBag (MediaBag, extractMediaBag, mediaDirectory)
|
19
|
+
import Text.Pandoc.Walk (walk)
|
20
|
+
|
21
|
+
|
22
|
+
-- | Apply a monadic action to the left of an `EitherT`
|
23
|
+
mapML :: Monad m => (a -> m b) -> EitherT a m c -> EitherT b m c
|
24
|
+
mapML !f !(EitherT m) = EitherT $ do
|
25
|
+
m' <- m
|
26
|
+
case m' of
|
27
|
+
( Left x) -> Left <$> f x
|
28
|
+
~(Right y) -> return $ Right y
|
29
|
+
|
30
|
+
-- | Apply a monadic action to the right of an `EitherT`
|
31
|
+
mapMR :: Monad m => (b -> m c) -> EitherT a m b -> EitherT a m c
|
32
|
+
mapMR !f !(EitherT m) = EitherT $ do
|
33
|
+
m' <- m
|
34
|
+
case m' of
|
35
|
+
( Left x) -> return $ Left x
|
36
|
+
~(Right y) -> Right <$> f y
|
37
|
+
|
38
|
+
|
39
|
+
-- | This function forks a thread to both catch exceptions and better kill infinite loops that can't be caught with `timeout` alone.
|
40
|
+
-- For example, @timeout 10 (forever $ return ())@ will never time out, but @timeoutEitherT 10 (lift $ forever $ return ())@ will.
|
41
|
+
--
|
42
|
+
-- timeoutEitherT is strict within the created thread
|
43
|
+
timeoutEitherT :: Int -> EitherT CStringLen IO b -> EitherT CStringLen IO b
|
44
|
+
timeoutEitherT !us !(EitherT io) = EitherT $ do
|
45
|
+
mvar <- newEmptyMVar
|
46
|
+
tid <- io `forkFinally` (putMVar mvar $!)
|
47
|
+
result <- timeout us (takeMVar mvar)
|
48
|
+
case result of
|
49
|
+
( Nothing ) -> killThread tid >> Left <$> newCStringLen ("Pandoc timed out after " ++ show us ++ "microseconds")
|
50
|
+
~(Just noTimeout) -> case noTimeout of
|
51
|
+
( Right successful) -> return successful
|
52
|
+
~(Left err ) -> Left <$> newCStringLen ("Pandoc internal error: " ++ show err)
|
53
|
+
|
54
|
+
|
55
|
+
-- | Taken from pandoc's executable, adjust the paths and extract the media to the given directory
|
56
|
+
-- https://github.com/jgm/pandoc/blob/9849ba7fd744f529f063e0802a18fa18c8433eeb/src/Text/Pandoc/Class.hs#L385
|
57
|
+
extractMedia :: MediaBag -> FilePath -> Pandoc -> IO Pandoc
|
58
|
+
extractMedia media dir d =
|
59
|
+
case [fp | (fp, _, _) <- mediaDirectory media] of
|
60
|
+
[] -> return d
|
61
|
+
fps -> do
|
62
|
+
extractMediaBag True dir media
|
63
|
+
return $ walk (adjustImagePath dir fps) d
|
64
|
+
|
65
|
+
-- | Also taken from pandoc's executable, adjust the image paths of inline elements
|
66
|
+
adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline
|
67
|
+
adjustImagePath dir paths (Image attr lab (src, tit))
|
68
|
+
| src `elem` paths = Image attr lab (dir ++ "/" ++ src, tit)
|
69
|
+
adjustImagePath _ _ x = x
|
70
|
+
|
71
|
+
-- | Extract the media bag to the given directory, unless that directory string is empty
|
72
|
+
extractCMediabag :: CStringLen -> MediaBag -> Pandoc -> EitherT CStringLen IO Pandoc
|
73
|
+
extractCMediabag !(_, 0) _ !pandoc = return pandoc
|
74
|
+
extractCMediabag !cPath !mediaBag !pandoc = do
|
75
|
+
nonEmptyPath <- lift $ peekCStringLen cPath
|
76
|
+
lift $ extractMedia mediaBag nonEmptyPath pandoc
|
77
|
+
|
78
|
+
|