pandoc_rb 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +45 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/.travis.yml +24 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +48 -0
- data/LICENSE +30 -0
- data/LICENSE.txt +31 -0
- data/README.md +83 -0
- data/Rakefile +31 -0
- data/Setup.hs +2 -0
- data/bench/bench.rb +69 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/ext/pandoc_rb/extconf.rb +130 -0
- data/ext/pandoc_rb/pandoc-rb.cabal +74 -0
- data/ext/pandoc_rb/pandoc_rb.c +100 -0
- data/ext/pandoc_rb/src/PandocRb.hs +26 -0
- data/ext/pandoc_rb/src/Text/Pandoc/C.hs +147 -0
- data/ext/pandoc_rb/src/Text/Pandoc/C/Types.hs +73 -0
- data/ext/pandoc_rb/src/Text/Pandoc/C/Utils.hs +78 -0
- data/ext/pandoc_rb/stack.yaml +70 -0
- data/ext/pandoc_rb/test/DetailedSpec.hs +173 -0
- data/lib/pandoc_rb.rb +96 -0
- data/lib/pandoc_rb/error.rb +6 -0
- data/lib/pandoc_rb/parse_failure.rb +11 -0
- data/lib/pandoc_rb/parsec_error.rb +13 -0
- data/lib/pandoc_rb/readers.rb +27 -0
- data/lib/pandoc_rb/version.rb +5 -0
- data/lib/pandoc_rb/writers.rb +46 -0
- data/pandoc_rb.gemspec +37 -0
- data/src/Text/Pandoc/C.hs +217 -0
- data/src/Text/Pandoc/C_stub.h +9 -0
- metadata +206 -0
@@ -0,0 +1,74 @@
|
|
1
|
+
name: pandoc-rb
|
2
|
+
version: 0.1.0.0
|
3
|
+
synopsis: Call pandoc from ruby using the FFI
|
4
|
+
description: This package provides C exports made for use by Ruby for calling Pandoc
|
5
|
+
homepage: https://github.com/michaeljklein/pandoc_rb
|
6
|
+
license: BSD3
|
7
|
+
author: Michael Klein
|
8
|
+
maintainer: lambdamichael@gmail.com
|
9
|
+
category: FFI
|
10
|
+
build-type: Simple
|
11
|
+
cabal-version: >=1.10
|
12
|
+
|
13
|
+
executable PandocRb.dylib
|
14
|
+
main-is: PandocRb.hs
|
15
|
+
other-extensions: ForeignFunctionInterface
|
16
|
+
build-depends: base >= 4.7 && < 5
|
17
|
+
, pandoc-rb
|
18
|
+
, bytestring
|
19
|
+
, either
|
20
|
+
, pandoc
|
21
|
+
, pandoc-types
|
22
|
+
, storable-tuple
|
23
|
+
, transformers
|
24
|
+
, aeson
|
25
|
+
, parsec
|
26
|
+
hs-source-dirs: src
|
27
|
+
other-modules: Text.Pandoc.C
|
28
|
+
, Text.Pandoc.C.Types
|
29
|
+
, Text.Pandoc.C.Utils
|
30
|
+
default-language: Haskell2010
|
31
|
+
include-dirs: src
|
32
|
+
ghc-options: -O3 -shared -fPIC -dynamic -Wall
|
33
|
+
extra-libraries: HSrts-ghc8.0.2
|
34
|
+
|
35
|
+
|
36
|
+
library
|
37
|
+
hs-source-dirs: src
|
38
|
+
exposed-modules: Text.Pandoc.C
|
39
|
+
, Text.Pandoc.C.Types
|
40
|
+
, Text.Pandoc.C.Utils
|
41
|
+
build-depends: base >= 4.7 && < 5
|
42
|
+
, bytestring
|
43
|
+
, either
|
44
|
+
, pandoc == 1.19.2.1337
|
45
|
+
, pandoc-types
|
46
|
+
, storable-tuple
|
47
|
+
, transformers
|
48
|
+
, aeson
|
49
|
+
, parsec
|
50
|
+
ghc-options: -O3 -Wall
|
51
|
+
default-language: Haskell2010
|
52
|
+
|
53
|
+
test-suite pandoc-rb-test
|
54
|
+
type: detailed-0.9
|
55
|
+
hs-source-dirs: test
|
56
|
+
test-module: DetailedSpec
|
57
|
+
build-depends: base >= 4.7 && < 5
|
58
|
+
, Cabal >= 1.20.0
|
59
|
+
, QuickCheck
|
60
|
+
, bytestring
|
61
|
+
, either
|
62
|
+
, pandoc == 1.19.2.1337
|
63
|
+
, pandoc-types
|
64
|
+
, pandoc-rb
|
65
|
+
, storable-tuple
|
66
|
+
, transformers
|
67
|
+
ghc-options: -threaded -rtsopts -with-rtsopts=-N
|
68
|
+
default-language: Haskell2010
|
69
|
+
|
70
|
+
source-repository head
|
71
|
+
type: git
|
72
|
+
location: https://github.com/michaeljklein/pandoc_rb
|
73
|
+
|
74
|
+
|
@@ -0,0 +1,100 @@
|
|
1
|
+
#include <stdio.h>
|
2
|
+
#include "ruby.h"
|
3
|
+
#include "HsFFI.h"
|
4
|
+
#include "PandocRb_stub.h"
|
5
|
+
|
6
|
+
VALUE PandocRb = Qnil;
|
7
|
+
|
8
|
+
void Init_pandoc_rb();
|
9
|
+
|
10
|
+
// a global designating whether the haskell runtime has been initialized
|
11
|
+
int hs_inited = 0;
|
12
|
+
|
13
|
+
VALUE method_convert_init(VALUE self);
|
14
|
+
VALUE method_convert_exit(VALUE self);
|
15
|
+
VALUE method_convert_raw(VALUE self, VALUE in_format, VALUE out_format, VALUE input, VALUE extract_media_path);
|
16
|
+
|
17
|
+
void Init_pandoc_rb() {
|
18
|
+
PandocRb = rb_define_module("PandocRb");
|
19
|
+
rb_define_module_function(PandocRb, "convert_init", method_convert_init, 0);
|
20
|
+
rb_define_module_function(PandocRb, "convert_exit", method_convert_exit, 0);
|
21
|
+
rb_define_module_function(PandocRb, "convert_raw" , method_convert_raw , 4);
|
22
|
+
}
|
23
|
+
|
24
|
+
VALUE method_convert_init(VALUE self) {
|
25
|
+
if (hs_inited) {
|
26
|
+
printf("pandoc_rb warning: Called convert_init after init\n");
|
27
|
+
} else {
|
28
|
+
hs_init(NULL, NULL);
|
29
|
+
hs_inited = 1;
|
30
|
+
}
|
31
|
+
return Qnil;
|
32
|
+
}
|
33
|
+
|
34
|
+
VALUE method_convert_exit(VALUE self) {
|
35
|
+
if (hs_inited) {
|
36
|
+
hs_exit();
|
37
|
+
hs_inited = 0;
|
38
|
+
} else {
|
39
|
+
printf("pandoc_rb warning: Called convert_exit before init\n");
|
40
|
+
}
|
41
|
+
return Qnil;
|
42
|
+
}
|
43
|
+
|
44
|
+
// Only checks whether inputs are strings, has no defaults and niether inits nor exits
|
45
|
+
VALUE method_convert_raw(VALUE self, VALUE in_format, VALUE out_format, VALUE input, VALUE extract_media_path) {
|
46
|
+
char * in_format_ptr;
|
47
|
+
char * out_format_ptr;
|
48
|
+
char * input_ptr;
|
49
|
+
char * extract_media_path_ptr;
|
50
|
+
long in_format_len;
|
51
|
+
long out_format_len;
|
52
|
+
long input_len;
|
53
|
+
long extract_media_path_len;
|
54
|
+
|
55
|
+
void * output_ptr;
|
56
|
+
|
57
|
+
VALUE success;
|
58
|
+
VALUE result_str;
|
59
|
+
VALUE result_pair;
|
60
|
+
|
61
|
+
SafeStringValue(in_format);
|
62
|
+
SafeStringValue(out_format);
|
63
|
+
SafeStringValue(input);
|
64
|
+
SafeStringValue(extract_media_path);
|
65
|
+
in_format_ptr = StringValuePtr(in_format);
|
66
|
+
out_format_ptr = StringValuePtr(out_format);
|
67
|
+
input_ptr = StringValuePtr(input);
|
68
|
+
extract_media_path_ptr = StringValuePtr(extract_media_path);
|
69
|
+
in_format_len = RSTRING_LEN(in_format);
|
70
|
+
out_format_len = RSTRING_LEN(out_format);
|
71
|
+
input_len = RSTRING_LEN(input);
|
72
|
+
extract_media_path_len = RSTRING_LEN(extract_media_path);
|
73
|
+
|
74
|
+
output_ptr = convert_hs(in_format_ptr, in_format_len,
|
75
|
+
out_format_ptr, out_format_len,
|
76
|
+
input_ptr, input_len,
|
77
|
+
extract_media_path_ptr, extract_media_path_len);
|
78
|
+
|
79
|
+
RB_GC_GUARD(in_format);
|
80
|
+
RB_GC_GUARD(out_format);
|
81
|
+
RB_GC_GUARD(input);
|
82
|
+
RB_GC_GUARD(extract_media_path);
|
83
|
+
|
84
|
+
if (result_success(output_ptr)) {
|
85
|
+
success = Qtrue;
|
86
|
+
} else {
|
87
|
+
success = Qfalse;
|
88
|
+
}
|
89
|
+
result_str = rb_utf8_str_new(result_ptr(output_ptr), result_len(output_ptr));
|
90
|
+
|
91
|
+
free_result(output_ptr);
|
92
|
+
|
93
|
+
result_pair = rb_ary_new_from_args(2, success, result_str);
|
94
|
+
|
95
|
+
RB_GC_GUARD(success);
|
96
|
+
RB_GC_GUARD(result_str);
|
97
|
+
|
98
|
+
return result_pair;
|
99
|
+
}
|
100
|
+
|
@@ -0,0 +1,26 @@
|
|
1
|
+
{-# LANGUAGE BangPatterns #-}
|
2
|
+
{-# LANGUAGE DeriveGeneric #-}
|
3
|
+
{-# LANGUAGE FlexibleContexts #-}
|
4
|
+
{-# LANGUAGE ForeignFunctionInterface #-}
|
5
|
+
{-# LANGUAGE RankNTypes #-}
|
6
|
+
{-# LANGUAGE StandaloneDeriving #-}
|
7
|
+
{-# LANGUAGE TupleSections #-}
|
8
|
+
{-# LANGUAGE ScopedTypeVariables #-}
|
9
|
+
|
10
|
+
module PandocRb where
|
11
|
+
|
12
|
+
import Foreign.C.Types (CChar, CLong(..), CInt(..))
|
13
|
+
import Foreign.Ptr (Ptr)
|
14
|
+
import Text.Pandoc.C (convert_hs, result_success, result_ptr, result_len, free_result)
|
15
|
+
|
16
|
+
|
17
|
+
foreign export ccall convert_hs :: Ptr CChar -> CLong -> Ptr CChar -> CLong -> Ptr CChar -> CLong -> Ptr CChar -> CLong -> IO (Ptr (CInt, Ptr CChar, CLong))
|
18
|
+
|
19
|
+
foreign export ccall result_success :: Ptr (CInt, Ptr CChar, CLong) -> IO CInt
|
20
|
+
|
21
|
+
foreign export ccall result_ptr :: Ptr (CInt, Ptr CChar, CLong) -> IO (Ptr CChar)
|
22
|
+
|
23
|
+
foreign export ccall result_len :: Ptr (CInt, Ptr CChar, CLong) -> IO CLong
|
24
|
+
|
25
|
+
foreign export ccall free_result :: Ptr (CInt, Ptr CChar, CLong) -> IO ()
|
26
|
+
|
@@ -0,0 +1,147 @@
|
|
1
|
+
{-# LANGUAGE BangPatterns #-}
|
2
|
+
{-# LANGUAGE DeriveGeneric #-}
|
3
|
+
{-# LANGUAGE FlexibleContexts #-}
|
4
|
+
{-# LANGUAGE ForeignFunctionInterface #-}
|
5
|
+
{-# LANGUAGE RankNTypes #-}
|
6
|
+
{-# LANGUAGE StandaloneDeriving #-}
|
7
|
+
{-# LANGUAGE TupleSections #-}
|
8
|
+
{-# LANGUAGE ScopedTypeVariables #-}
|
9
|
+
|
10
|
+
module Text.Pandoc.C where
|
11
|
+
|
12
|
+
import Control.Monad.Trans.Class (lift)
|
13
|
+
import Control.Monad.Trans.Either (EitherT(..))
|
14
|
+
import Data.Bifunctor (bimap, second)
|
15
|
+
import Data.ByteString (useAsCStringLen)
|
16
|
+
import Data.ByteString.Lazy (toStrict, fromStrict)
|
17
|
+
import Data.ByteString.Unsafe (unsafePackCStringLen)
|
18
|
+
import Data.Monoid ((<>))
|
19
|
+
import Foreign.C.String (CStringLen, peekCStringLen, newCStringLen)
|
20
|
+
import Foreign.C.Types (CChar, CLong(..), CInt(..))
|
21
|
+
import Foreign.Marshal.Alloc (malloc, free)
|
22
|
+
import Foreign.Ptr (Ptr)
|
23
|
+
import Foreign.Storable (peek, poke)
|
24
|
+
import Foreign.Storable.Tuple ()
|
25
|
+
import Text.Pandoc (ReaderOptions, WriterOptions(..), Reader(..), Writer(..), HTMLMathMethod(..), writerMediaBag, getReader, getWriter, def)
|
26
|
+
|
27
|
+
import Text.Pandoc.C.Utils
|
28
|
+
import Text.Pandoc.C.Types
|
29
|
+
|
30
|
+
|
31
|
+
-- | Convert a `Reader` to a `CReader`
|
32
|
+
mkCReader :: Reader -> CReader
|
33
|
+
mkCReader !(StringReader reader) !opts !cstr = mapML showPandocError . EitherT $ do
|
34
|
+
str <- peekCStringLen cstr
|
35
|
+
readerResult <- reader opts str
|
36
|
+
return $ second (, mempty) $! readerResult
|
37
|
+
mkCReader ~(ByteStringReader reader) !opts str = mapML showPandocError . EitherT $ do
|
38
|
+
bs <- unsafePackCStringLen str
|
39
|
+
readerResult <- reader opts $! fromStrict bs
|
40
|
+
return readerResult
|
41
|
+
|
42
|
+
|
43
|
+
-- | Convert a `Writer` to a `CWriter`
|
44
|
+
mkCWriter :: Writer -> CWriter
|
45
|
+
mkCWriter !(PureStringWriter writer) !opts !pandoc = lift $ newCStringLen (writer opts pandoc)
|
46
|
+
mkCWriter !(IOStringWriter writer) !opts !pandoc = lift $ writer opts pandoc >>= newCStringLen
|
47
|
+
mkCWriter !(IOByteStringWriter writer) !opts !pandoc = lift $ writer opts pandoc >>= flip useAsCStringLen return . toStrict
|
48
|
+
|
49
|
+
|
50
|
+
-- | `getReader` for foreign C exports. Retrieve reader based on formatSpec (format+extensions).
|
51
|
+
getCReader :: CStringLen -> EitherT CStringLen IO CReader
|
52
|
+
getCReader !cstr = do
|
53
|
+
str <- lift . peekCStringLen $! cstr
|
54
|
+
EitherT $ case getReader str of
|
55
|
+
( Left err ) -> Left <$> newCStringLen err
|
56
|
+
~(Right reader) -> return . Right $! mkCReader reader
|
57
|
+
|
58
|
+
-- | `getWriter` for foreign C exports. Retrieve writer based on formatSpec (format+extensions).
|
59
|
+
getCWriter :: CStringLen -> EitherT CStringLen IO CWriter
|
60
|
+
getCWriter !cstr = do
|
61
|
+
str <- lift $ peekCStringLen cstr
|
62
|
+
EitherT $ case getWriter str of
|
63
|
+
( Left err ) -> Left <$> newCStringLen err
|
64
|
+
~(Right writer) -> return . Right $! mkCWriter writer
|
65
|
+
|
66
|
+
|
67
|
+
-- | The main conversion function.
|
68
|
+
-- Takes `ReaderOptions`, `WriterOptions`, reader's formatSpec,
|
69
|
+
-- writer's formatSpec, input and returns either an error string
|
70
|
+
-- or the output
|
71
|
+
convert :: ReaderOptions
|
72
|
+
-> WriterOptions
|
73
|
+
-> CStringLen -- ^ Reader specification
|
74
|
+
-> CStringLen -- ^ Writer specification
|
75
|
+
-> CStringLen -- ^ Input
|
76
|
+
-> CStringLen -- ^ Extract media dir
|
77
|
+
-> EitherT CStringLen IO CStringLen -- ^ Either an error string or the output
|
78
|
+
convert !readerOpts !writerOpts !readerStr !writerStr !input !mediaBagStr = do
|
79
|
+
reader <- getCReader $! readerStr
|
80
|
+
(pandoc, mediaBag) <- reader readerOpts $! input
|
81
|
+
pandoc' <- extractCMediabag mediaBagStr mediaBag pandoc
|
82
|
+
writer <- getCWriter $! writerStr
|
83
|
+
let writerOpts' = writerOpts { writerMediaBag = writerMediaBag writerOpts <> mediaBag }
|
84
|
+
writer writerOpts' pandoc'
|
85
|
+
|
86
|
+
|
87
|
+
-- | The `ReaderOptions` wired into the foreign export
|
88
|
+
readerOptions :: ReaderOptions
|
89
|
+
readerOptions = def
|
90
|
+
|
91
|
+
-- | The `WriterOptions` wired into the foreign export
|
92
|
+
writerOptions :: WriterOptions
|
93
|
+
writerOptions = def { writerHTMLMathMethod = MathJax "https://fail.cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS-MML_HTMLorMML" }
|
94
|
+
|
95
|
+
|
96
|
+
-- | Takes: reader, writer, input and returns (isSuccess, output pointer, output length)
|
97
|
+
convert_hs :: Ptr CChar -- ^ reader format pointer
|
98
|
+
-> CLong -- ^ reader format length
|
99
|
+
-> Ptr CChar -- ^ writer format pointer
|
100
|
+
-> CLong -- ^ writer format length
|
101
|
+
-> Ptr CChar -- ^ input pointer
|
102
|
+
-> CLong -- ^ input length
|
103
|
+
-> Ptr CChar -- ^ media extraction path pointer
|
104
|
+
-> CLong -- ^ media extraction path length
|
105
|
+
-> IO (Ptr (CInt, Ptr CChar, CLong)) -- ^ Pointer to @(success, output pointer, output length)@
|
106
|
+
convert_hs !readerStr !readerLen !writerStr !writerLen !input !inputLen !mediaBagStr !mediaBagLen = do
|
107
|
+
let readerStr' = (readerStr , fromEnum readerLen )
|
108
|
+
let writerStr' = (writerStr , fromEnum writerLen )
|
109
|
+
let input' = (input , fromEnum inputLen )
|
110
|
+
let mediaBagStr' = (mediaBagStr, fromEnum mediaBagLen)
|
111
|
+
let converted = convert readerOptions writerOptions readerStr' writerStr' input' mediaBagStr'
|
112
|
+
let EitherT safeConverted = timeoutEitherT (10 * {- minutes -} 60000000) converted
|
113
|
+
result <- bimap (second toEnum) (second toEnum) <$> safeConverted
|
114
|
+
let (success, (rstrPtr, rstrLen)) = either (0,) (1,) result
|
115
|
+
addr <- malloc
|
116
|
+
addr `poke` (success, rstrPtr, rstrLen)
|
117
|
+
return addr
|
118
|
+
|
119
|
+
|
120
|
+
-- | Extract the success value from a result pointer
|
121
|
+
result_success :: Ptr (CInt, Ptr CChar, CLong) -> IO CInt
|
122
|
+
result_success addr = do
|
123
|
+
(success, _, _) <- peek addr
|
124
|
+
return success
|
125
|
+
|
126
|
+
-- | Extract the pointer to the result string from a result pointer
|
127
|
+
result_ptr :: Ptr (CInt, Ptr CChar, CLong) -> IO (Ptr CChar)
|
128
|
+
result_ptr addr = do
|
129
|
+
(_, ptr, _) <- peek addr
|
130
|
+
return ptr
|
131
|
+
|
132
|
+
-- | Extract the length (in bytes) of the result string from a result pointer
|
133
|
+
result_len :: Ptr (CInt, Ptr CChar, CLong) -> IO CLong
|
134
|
+
result_len addr = do
|
135
|
+
(_, _, len) <- peek addr
|
136
|
+
return len
|
137
|
+
|
138
|
+
-- | Free a result pointer. Note: This only frees the struct that wraps the
|
139
|
+
-- result values, it does not free the result string's memory
|
140
|
+
free_result :: Ptr (CInt, Ptr CChar, CLong) -> IO ()
|
141
|
+
free_result addr = do
|
142
|
+
-- (_, ptr, _) <- peek addr -- this is commented out because the caller manages it
|
143
|
+
-- free ptr
|
144
|
+
free addr
|
145
|
+
return ()
|
146
|
+
|
147
|
+
|
@@ -0,0 +1,73 @@
|
|
1
|
+
{-# LANGUAGE BangPatterns #-}
|
2
|
+
{-# LANGUAGE DeriveGeneric #-}
|
3
|
+
{-# LANGUAGE FlexibleContexts #-}
|
4
|
+
{-# LANGUAGE ForeignFunctionInterface #-}
|
5
|
+
{-# LANGUAGE RankNTypes #-}
|
6
|
+
{-# LANGUAGE StandaloneDeriving #-}
|
7
|
+
{-# LANGUAGE TupleSections #-}
|
8
|
+
{-# LANGUAGE ScopedTypeVariables #-}
|
9
|
+
|
10
|
+
module Text.Pandoc.C.Types where
|
11
|
+
|
12
|
+
import Control.Monad.Trans.Either (EitherT(..))
|
13
|
+
import Data.Aeson (ToJSON(..), genericToEncoding, defaultOptions, genericToJSON, encode)
|
14
|
+
import Data.ByteString.Lazy (toStrict)
|
15
|
+
import Data.ByteString (useAsCStringLen)
|
16
|
+
import Foreign.C.String (CStringLen)
|
17
|
+
import Foreign.C.Types (CChar, CLong(..))
|
18
|
+
import Foreign.Ptr (Ptr)
|
19
|
+
import GHC.Generics (Generic)
|
20
|
+
import Text.Pandoc (ReaderOptions, Pandoc, WriterOptions(..), PandocError(..))
|
21
|
+
import Text.Pandoc.MediaBag (MediaBag)
|
22
|
+
import Text.Parsec.Error (ParseError, Message(..), errorPos, errorMessages)
|
23
|
+
import Text.Parsec.Pos (SourcePos, SourceName, Line, Column, sourceName, sourceLine, sourceColumn)
|
24
|
+
|
25
|
+
|
26
|
+
-- | The `Reader` type modified for foreign C exports
|
27
|
+
type CReader = ReaderOptions -> CStringLen -> EitherT CStringLen IO (Pandoc, MediaBag)
|
28
|
+
|
29
|
+
-- | The `Writer` type modified for foreign C exports
|
30
|
+
type CWriter = WriterOptions -> Pandoc -> EitherT CStringLen IO CStringLen
|
31
|
+
|
32
|
+
|
33
|
+
-- | A clone of `SourcePos`, to allow deriving `Generic`
|
34
|
+
data SourcePos' = SourcePos' SourceName !Line !Column deriving (Eq, Ord, Show, Generic)
|
35
|
+
|
36
|
+
instance ToJSON SourcePos' where
|
37
|
+
toEncoding = genericToEncoding defaultOptions
|
38
|
+
|
39
|
+
deriving instance Generic Message
|
40
|
+
|
41
|
+
instance ToJSON Message where
|
42
|
+
toEncoding = genericToEncoding defaultOptions
|
43
|
+
|
44
|
+
-- | A clone of `ParseError`, to allow deriving `Generic`
|
45
|
+
data ParseError' = ParseError' !SourcePos' [Message] deriving (Generic)
|
46
|
+
|
47
|
+
-- | Convert to our clone of `SourcePos`
|
48
|
+
sourcePos' :: SourcePos -> SourcePos'
|
49
|
+
sourcePos' !sp = SourcePos' (sourceName sp) (sourceLine sp) (sourceColumn sp)
|
50
|
+
|
51
|
+
-- | Convert to our clone of `ParseError`
|
52
|
+
parseError' :: ParseError -> ParseError'
|
53
|
+
parseError' !pe = ParseError' (sourcePos' (errorPos pe)) (errorMessages pe)
|
54
|
+
|
55
|
+
|
56
|
+
instance ToJSON ParseError where
|
57
|
+
toJSON = genericToJSON defaultOptions . parseError'
|
58
|
+
toEncoding = genericToEncoding defaultOptions . parseError'
|
59
|
+
|
60
|
+
instance ToJSON PandocError where
|
61
|
+
toEncoding = genericToEncoding defaultOptions
|
62
|
+
|
63
|
+
-- | Convert a `PandocError` to JSON
|
64
|
+
showPandocError :: PandocError -> IO CStringLen
|
65
|
+
showPandocError = flip useAsCStringLen return . toStrict . encode
|
66
|
+
|
67
|
+
|
68
|
+
-- | `CStringLen` for use by Ruby
|
69
|
+
type RStringLen = (Ptr CChar, CLong)
|
70
|
+
|
71
|
+
|
72
|
+
|
73
|
+
|
@@ -0,0 +1,78 @@
|
|
1
|
+
{-# LANGUAGE BangPatterns #-}
|
2
|
+
{-# LANGUAGE DeriveGeneric #-}
|
3
|
+
{-# LANGUAGE FlexibleContexts #-}
|
4
|
+
{-# LANGUAGE ForeignFunctionInterface #-}
|
5
|
+
{-# LANGUAGE RankNTypes #-}
|
6
|
+
{-# LANGUAGE StandaloneDeriving #-}
|
7
|
+
{-# LANGUAGE TupleSections #-}
|
8
|
+
{-# LANGUAGE ScopedTypeVariables #-}
|
9
|
+
|
10
|
+
module Text.Pandoc.C.Utils where
|
11
|
+
|
12
|
+
import Control.Concurrent
|
13
|
+
import Control.Monad.Trans.Class (lift)
|
14
|
+
import Control.Monad.Trans.Either (EitherT(..))
|
15
|
+
import Foreign.C.String (CStringLen, peekCStringLen, newCStringLen)
|
16
|
+
import System.Timeout (timeout)
|
17
|
+
import Text.Pandoc (Pandoc, Inline(..))
|
18
|
+
import Text.Pandoc.MediaBag (MediaBag, extractMediaBag, mediaDirectory)
|
19
|
+
import Text.Pandoc.Walk (walk)
|
20
|
+
|
21
|
+
|
22
|
+
-- | Apply a monadic action to the left of an `EitherT`
|
23
|
+
mapML :: Monad m => (a -> m b) -> EitherT a m c -> EitherT b m c
|
24
|
+
mapML !f !(EitherT m) = EitherT $ do
|
25
|
+
m' <- m
|
26
|
+
case m' of
|
27
|
+
( Left x) -> Left <$> f x
|
28
|
+
~(Right y) -> return $ Right y
|
29
|
+
|
30
|
+
-- | Apply a monadic action to the right of an `EitherT`
|
31
|
+
mapMR :: Monad m => (b -> m c) -> EitherT a m b -> EitherT a m c
|
32
|
+
mapMR !f !(EitherT m) = EitherT $ do
|
33
|
+
m' <- m
|
34
|
+
case m' of
|
35
|
+
( Left x) -> return $ Left x
|
36
|
+
~(Right y) -> Right <$> f y
|
37
|
+
|
38
|
+
|
39
|
+
-- | This function forks a thread to both catch exceptions and better kill infinite loops that can't be caught with `timeout` alone.
|
40
|
+
-- For example, @timeout 10 (forever $ return ())@ will never time out, but @timeoutEitherT 10 (lift $ forever $ return ())@ will.
|
41
|
+
--
|
42
|
+
-- timeoutEitherT is strict within the created thread
|
43
|
+
timeoutEitherT :: Int -> EitherT CStringLen IO b -> EitherT CStringLen IO b
|
44
|
+
timeoutEitherT !us !(EitherT io) = EitherT $ do
|
45
|
+
mvar <- newEmptyMVar
|
46
|
+
tid <- io `forkFinally` (putMVar mvar $!)
|
47
|
+
result <- timeout us (takeMVar mvar)
|
48
|
+
case result of
|
49
|
+
( Nothing ) -> killThread tid >> Left <$> newCStringLen ("Pandoc timed out after " ++ show us ++ "microseconds")
|
50
|
+
~(Just noTimeout) -> case noTimeout of
|
51
|
+
( Right successful) -> return successful
|
52
|
+
~(Left err ) -> Left <$> newCStringLen ("Pandoc internal error: " ++ show err)
|
53
|
+
|
54
|
+
|
55
|
+
-- | Taken from pandoc's executable, adjust the paths and extract the media to the given directory
|
56
|
+
-- https://github.com/jgm/pandoc/blob/9849ba7fd744f529f063e0802a18fa18c8433eeb/src/Text/Pandoc/Class.hs#L385
|
57
|
+
extractMedia :: MediaBag -> FilePath -> Pandoc -> IO Pandoc
|
58
|
+
extractMedia media dir d =
|
59
|
+
case [fp | (fp, _, _) <- mediaDirectory media] of
|
60
|
+
[] -> return d
|
61
|
+
fps -> do
|
62
|
+
extractMediaBag True dir media
|
63
|
+
return $ walk (adjustImagePath dir fps) d
|
64
|
+
|
65
|
+
-- | Also taken from pandoc's executable, adjust the image paths of inline elements
|
66
|
+
adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline
|
67
|
+
adjustImagePath dir paths (Image attr lab (src, tit))
|
68
|
+
| src `elem` paths = Image attr lab (dir ++ "/" ++ src, tit)
|
69
|
+
adjustImagePath _ _ x = x
|
70
|
+
|
71
|
+
-- | Extract the media bag to the given directory, unless that directory string is empty
|
72
|
+
extractCMediabag :: CStringLen -> MediaBag -> Pandoc -> EitherT CStringLen IO Pandoc
|
73
|
+
extractCMediabag !(_, 0) _ !pandoc = return pandoc
|
74
|
+
extractCMediabag !cPath !mediaBag !pandoc = do
|
75
|
+
nonEmptyPath <- lift $ peekCStringLen cPath
|
76
|
+
lift $ extractMedia mediaBag nonEmptyPath pandoc
|
77
|
+
|
78
|
+
|