pandoc_rb 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,74 @@
1
+ name: pandoc-rb
2
+ version: 0.1.0.0
3
+ synopsis: Call pandoc from ruby using the FFI
4
+ description: This package provides C exports made for use by Ruby for calling Pandoc
5
+ homepage: https://github.com/michaeljklein/pandoc_rb
6
+ license: BSD3
7
+ author: Michael Klein
8
+ maintainer: lambdamichael@gmail.com
9
+ category: FFI
10
+ build-type: Simple
11
+ cabal-version: >=1.10
12
+
13
+ executable PandocRb.dylib
14
+ main-is: PandocRb.hs
15
+ other-extensions: ForeignFunctionInterface
16
+ build-depends: base >= 4.7 && < 5
17
+ , pandoc-rb
18
+ , bytestring
19
+ , either
20
+ , pandoc
21
+ , pandoc-types
22
+ , storable-tuple
23
+ , transformers
24
+ , aeson
25
+ , parsec
26
+ hs-source-dirs: src
27
+ other-modules: Text.Pandoc.C
28
+ , Text.Pandoc.C.Types
29
+ , Text.Pandoc.C.Utils
30
+ default-language: Haskell2010
31
+ include-dirs: src
32
+ ghc-options: -O3 -shared -fPIC -dynamic -Wall
33
+ extra-libraries: HSrts-ghc8.0.2
34
+
35
+
36
+ library
37
+ hs-source-dirs: src
38
+ exposed-modules: Text.Pandoc.C
39
+ , Text.Pandoc.C.Types
40
+ , Text.Pandoc.C.Utils
41
+ build-depends: base >= 4.7 && < 5
42
+ , bytestring
43
+ , either
44
+ , pandoc == 1.19.2.1337
45
+ , pandoc-types
46
+ , storable-tuple
47
+ , transformers
48
+ , aeson
49
+ , parsec
50
+ ghc-options: -O3 -Wall
51
+ default-language: Haskell2010
52
+
53
+ test-suite pandoc-rb-test
54
+ type: detailed-0.9
55
+ hs-source-dirs: test
56
+ test-module: DetailedSpec
57
+ build-depends: base >= 4.7 && < 5
58
+ , Cabal >= 1.20.0
59
+ , QuickCheck
60
+ , bytestring
61
+ , either
62
+ , pandoc == 1.19.2.1337
63
+ , pandoc-types
64
+ , pandoc-rb
65
+ , storable-tuple
66
+ , transformers
67
+ ghc-options: -threaded -rtsopts -with-rtsopts=-N
68
+ default-language: Haskell2010
69
+
70
+ source-repository head
71
+ type: git
72
+ location: https://github.com/michaeljklein/pandoc_rb
73
+
74
+
@@ -0,0 +1,100 @@
1
+ #include <stdio.h>
2
+ #include "ruby.h"
3
+ #include "HsFFI.h"
4
+ #include "PandocRb_stub.h"
5
+
6
+ VALUE PandocRb = Qnil;
7
+
8
+ void Init_pandoc_rb();
9
+
10
+ // a global designating whether the haskell runtime has been initialized
11
+ int hs_inited = 0;
12
+
13
+ VALUE method_convert_init(VALUE self);
14
+ VALUE method_convert_exit(VALUE self);
15
+ VALUE method_convert_raw(VALUE self, VALUE in_format, VALUE out_format, VALUE input, VALUE extract_media_path);
16
+
17
+ void Init_pandoc_rb() {
18
+ PandocRb = rb_define_module("PandocRb");
19
+ rb_define_module_function(PandocRb, "convert_init", method_convert_init, 0);
20
+ rb_define_module_function(PandocRb, "convert_exit", method_convert_exit, 0);
21
+ rb_define_module_function(PandocRb, "convert_raw" , method_convert_raw , 4);
22
+ }
23
+
24
+ VALUE method_convert_init(VALUE self) {
25
+ if (hs_inited) {
26
+ printf("pandoc_rb warning: Called convert_init after init\n");
27
+ } else {
28
+ hs_init(NULL, NULL);
29
+ hs_inited = 1;
30
+ }
31
+ return Qnil;
32
+ }
33
+
34
+ VALUE method_convert_exit(VALUE self) {
35
+ if (hs_inited) {
36
+ hs_exit();
37
+ hs_inited = 0;
38
+ } else {
39
+ printf("pandoc_rb warning: Called convert_exit before init\n");
40
+ }
41
+ return Qnil;
42
+ }
43
+
44
+ // Only checks whether inputs are strings, has no defaults and niether inits nor exits
45
+ VALUE method_convert_raw(VALUE self, VALUE in_format, VALUE out_format, VALUE input, VALUE extract_media_path) {
46
+ char * in_format_ptr;
47
+ char * out_format_ptr;
48
+ char * input_ptr;
49
+ char * extract_media_path_ptr;
50
+ long in_format_len;
51
+ long out_format_len;
52
+ long input_len;
53
+ long extract_media_path_len;
54
+
55
+ void * output_ptr;
56
+
57
+ VALUE success;
58
+ VALUE result_str;
59
+ VALUE result_pair;
60
+
61
+ SafeStringValue(in_format);
62
+ SafeStringValue(out_format);
63
+ SafeStringValue(input);
64
+ SafeStringValue(extract_media_path);
65
+ in_format_ptr = StringValuePtr(in_format);
66
+ out_format_ptr = StringValuePtr(out_format);
67
+ input_ptr = StringValuePtr(input);
68
+ extract_media_path_ptr = StringValuePtr(extract_media_path);
69
+ in_format_len = RSTRING_LEN(in_format);
70
+ out_format_len = RSTRING_LEN(out_format);
71
+ input_len = RSTRING_LEN(input);
72
+ extract_media_path_len = RSTRING_LEN(extract_media_path);
73
+
74
+ output_ptr = convert_hs(in_format_ptr, in_format_len,
75
+ out_format_ptr, out_format_len,
76
+ input_ptr, input_len,
77
+ extract_media_path_ptr, extract_media_path_len);
78
+
79
+ RB_GC_GUARD(in_format);
80
+ RB_GC_GUARD(out_format);
81
+ RB_GC_GUARD(input);
82
+ RB_GC_GUARD(extract_media_path);
83
+
84
+ if (result_success(output_ptr)) {
85
+ success = Qtrue;
86
+ } else {
87
+ success = Qfalse;
88
+ }
89
+ result_str = rb_utf8_str_new(result_ptr(output_ptr), result_len(output_ptr));
90
+
91
+ free_result(output_ptr);
92
+
93
+ result_pair = rb_ary_new_from_args(2, success, result_str);
94
+
95
+ RB_GC_GUARD(success);
96
+ RB_GC_GUARD(result_str);
97
+
98
+ return result_pair;
99
+ }
100
+
@@ -0,0 +1,26 @@
1
+ {-# LANGUAGE BangPatterns #-}
2
+ {-# LANGUAGE DeriveGeneric #-}
3
+ {-# LANGUAGE FlexibleContexts #-}
4
+ {-# LANGUAGE ForeignFunctionInterface #-}
5
+ {-# LANGUAGE RankNTypes #-}
6
+ {-# LANGUAGE StandaloneDeriving #-}
7
+ {-# LANGUAGE TupleSections #-}
8
+ {-# LANGUAGE ScopedTypeVariables #-}
9
+
10
+ module PandocRb where
11
+
12
+ import Foreign.C.Types (CChar, CLong(..), CInt(..))
13
+ import Foreign.Ptr (Ptr)
14
+ import Text.Pandoc.C (convert_hs, result_success, result_ptr, result_len, free_result)
15
+
16
+
17
+ foreign export ccall convert_hs :: Ptr CChar -> CLong -> Ptr CChar -> CLong -> Ptr CChar -> CLong -> Ptr CChar -> CLong -> IO (Ptr (CInt, Ptr CChar, CLong))
18
+
19
+ foreign export ccall result_success :: Ptr (CInt, Ptr CChar, CLong) -> IO CInt
20
+
21
+ foreign export ccall result_ptr :: Ptr (CInt, Ptr CChar, CLong) -> IO (Ptr CChar)
22
+
23
+ foreign export ccall result_len :: Ptr (CInt, Ptr CChar, CLong) -> IO CLong
24
+
25
+ foreign export ccall free_result :: Ptr (CInt, Ptr CChar, CLong) -> IO ()
26
+
@@ -0,0 +1,147 @@
1
+ {-# LANGUAGE BangPatterns #-}
2
+ {-# LANGUAGE DeriveGeneric #-}
3
+ {-# LANGUAGE FlexibleContexts #-}
4
+ {-# LANGUAGE ForeignFunctionInterface #-}
5
+ {-# LANGUAGE RankNTypes #-}
6
+ {-# LANGUAGE StandaloneDeriving #-}
7
+ {-# LANGUAGE TupleSections #-}
8
+ {-# LANGUAGE ScopedTypeVariables #-}
9
+
10
+ module Text.Pandoc.C where
11
+
12
+ import Control.Monad.Trans.Class (lift)
13
+ import Control.Monad.Trans.Either (EitherT(..))
14
+ import Data.Bifunctor (bimap, second)
15
+ import Data.ByteString (useAsCStringLen)
16
+ import Data.ByteString.Lazy (toStrict, fromStrict)
17
+ import Data.ByteString.Unsafe (unsafePackCStringLen)
18
+ import Data.Monoid ((<>))
19
+ import Foreign.C.String (CStringLen, peekCStringLen, newCStringLen)
20
+ import Foreign.C.Types (CChar, CLong(..), CInt(..))
21
+ import Foreign.Marshal.Alloc (malloc, free)
22
+ import Foreign.Ptr (Ptr)
23
+ import Foreign.Storable (peek, poke)
24
+ import Foreign.Storable.Tuple ()
25
+ import Text.Pandoc (ReaderOptions, WriterOptions(..), Reader(..), Writer(..), HTMLMathMethod(..), writerMediaBag, getReader, getWriter, def)
26
+
27
+ import Text.Pandoc.C.Utils
28
+ import Text.Pandoc.C.Types
29
+
30
+
31
+ -- | Convert a `Reader` to a `CReader`
32
+ mkCReader :: Reader -> CReader
33
+ mkCReader !(StringReader reader) !opts !cstr = mapML showPandocError . EitherT $ do
34
+ str <- peekCStringLen cstr
35
+ readerResult <- reader opts str
36
+ return $ second (, mempty) $! readerResult
37
+ mkCReader ~(ByteStringReader reader) !opts str = mapML showPandocError . EitherT $ do
38
+ bs <- unsafePackCStringLen str
39
+ readerResult <- reader opts $! fromStrict bs
40
+ return readerResult
41
+
42
+
43
+ -- | Convert a `Writer` to a `CWriter`
44
+ mkCWriter :: Writer -> CWriter
45
+ mkCWriter !(PureStringWriter writer) !opts !pandoc = lift $ newCStringLen (writer opts pandoc)
46
+ mkCWriter !(IOStringWriter writer) !opts !pandoc = lift $ writer opts pandoc >>= newCStringLen
47
+ mkCWriter !(IOByteStringWriter writer) !opts !pandoc = lift $ writer opts pandoc >>= flip useAsCStringLen return . toStrict
48
+
49
+
50
+ -- | `getReader` for foreign C exports. Retrieve reader based on formatSpec (format+extensions).
51
+ getCReader :: CStringLen -> EitherT CStringLen IO CReader
52
+ getCReader !cstr = do
53
+ str <- lift . peekCStringLen $! cstr
54
+ EitherT $ case getReader str of
55
+ ( Left err ) -> Left <$> newCStringLen err
56
+ ~(Right reader) -> return . Right $! mkCReader reader
57
+
58
+ -- | `getWriter` for foreign C exports. Retrieve writer based on formatSpec (format+extensions).
59
+ getCWriter :: CStringLen -> EitherT CStringLen IO CWriter
60
+ getCWriter !cstr = do
61
+ str <- lift $ peekCStringLen cstr
62
+ EitherT $ case getWriter str of
63
+ ( Left err ) -> Left <$> newCStringLen err
64
+ ~(Right writer) -> return . Right $! mkCWriter writer
65
+
66
+
67
+ -- | The main conversion function.
68
+ -- Takes `ReaderOptions`, `WriterOptions`, reader's formatSpec,
69
+ -- writer's formatSpec, input and returns either an error string
70
+ -- or the output
71
+ convert :: ReaderOptions
72
+ -> WriterOptions
73
+ -> CStringLen -- ^ Reader specification
74
+ -> CStringLen -- ^ Writer specification
75
+ -> CStringLen -- ^ Input
76
+ -> CStringLen -- ^ Extract media dir
77
+ -> EitherT CStringLen IO CStringLen -- ^ Either an error string or the output
78
+ convert !readerOpts !writerOpts !readerStr !writerStr !input !mediaBagStr = do
79
+ reader <- getCReader $! readerStr
80
+ (pandoc, mediaBag) <- reader readerOpts $! input
81
+ pandoc' <- extractCMediabag mediaBagStr mediaBag pandoc
82
+ writer <- getCWriter $! writerStr
83
+ let writerOpts' = writerOpts { writerMediaBag = writerMediaBag writerOpts <> mediaBag }
84
+ writer writerOpts' pandoc'
85
+
86
+
87
+ -- | The `ReaderOptions` wired into the foreign export
88
+ readerOptions :: ReaderOptions
89
+ readerOptions = def
90
+
91
+ -- | The `WriterOptions` wired into the foreign export
92
+ writerOptions :: WriterOptions
93
+ writerOptions = def { writerHTMLMathMethod = MathJax "https://fail.cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS-MML_HTMLorMML" }
94
+
95
+
96
+ -- | Takes: reader, writer, input and returns (isSuccess, output pointer, output length)
97
+ convert_hs :: Ptr CChar -- ^ reader format pointer
98
+ -> CLong -- ^ reader format length
99
+ -> Ptr CChar -- ^ writer format pointer
100
+ -> CLong -- ^ writer format length
101
+ -> Ptr CChar -- ^ input pointer
102
+ -> CLong -- ^ input length
103
+ -> Ptr CChar -- ^ media extraction path pointer
104
+ -> CLong -- ^ media extraction path length
105
+ -> IO (Ptr (CInt, Ptr CChar, CLong)) -- ^ Pointer to @(success, output pointer, output length)@
106
+ convert_hs !readerStr !readerLen !writerStr !writerLen !input !inputLen !mediaBagStr !mediaBagLen = do
107
+ let readerStr' = (readerStr , fromEnum readerLen )
108
+ let writerStr' = (writerStr , fromEnum writerLen )
109
+ let input' = (input , fromEnum inputLen )
110
+ let mediaBagStr' = (mediaBagStr, fromEnum mediaBagLen)
111
+ let converted = convert readerOptions writerOptions readerStr' writerStr' input' mediaBagStr'
112
+ let EitherT safeConverted = timeoutEitherT (10 * {- minutes -} 60000000) converted
113
+ result <- bimap (second toEnum) (second toEnum) <$> safeConverted
114
+ let (success, (rstrPtr, rstrLen)) = either (0,) (1,) result
115
+ addr <- malloc
116
+ addr `poke` (success, rstrPtr, rstrLen)
117
+ return addr
118
+
119
+
120
+ -- | Extract the success value from a result pointer
121
+ result_success :: Ptr (CInt, Ptr CChar, CLong) -> IO CInt
122
+ result_success addr = do
123
+ (success, _, _) <- peek addr
124
+ return success
125
+
126
+ -- | Extract the pointer to the result string from a result pointer
127
+ result_ptr :: Ptr (CInt, Ptr CChar, CLong) -> IO (Ptr CChar)
128
+ result_ptr addr = do
129
+ (_, ptr, _) <- peek addr
130
+ return ptr
131
+
132
+ -- | Extract the length (in bytes) of the result string from a result pointer
133
+ result_len :: Ptr (CInt, Ptr CChar, CLong) -> IO CLong
134
+ result_len addr = do
135
+ (_, _, len) <- peek addr
136
+ return len
137
+
138
+ -- | Free a result pointer. Note: This only frees the struct that wraps the
139
+ -- result values, it does not free the result string's memory
140
+ free_result :: Ptr (CInt, Ptr CChar, CLong) -> IO ()
141
+ free_result addr = do
142
+ -- (_, ptr, _) <- peek addr -- this is commented out because the caller manages it
143
+ -- free ptr
144
+ free addr
145
+ return ()
146
+
147
+
@@ -0,0 +1,73 @@
1
+ {-# LANGUAGE BangPatterns #-}
2
+ {-# LANGUAGE DeriveGeneric #-}
3
+ {-# LANGUAGE FlexibleContexts #-}
4
+ {-# LANGUAGE ForeignFunctionInterface #-}
5
+ {-# LANGUAGE RankNTypes #-}
6
+ {-# LANGUAGE StandaloneDeriving #-}
7
+ {-# LANGUAGE TupleSections #-}
8
+ {-# LANGUAGE ScopedTypeVariables #-}
9
+
10
+ module Text.Pandoc.C.Types where
11
+
12
+ import Control.Monad.Trans.Either (EitherT(..))
13
+ import Data.Aeson (ToJSON(..), genericToEncoding, defaultOptions, genericToJSON, encode)
14
+ import Data.ByteString.Lazy (toStrict)
15
+ import Data.ByteString (useAsCStringLen)
16
+ import Foreign.C.String (CStringLen)
17
+ import Foreign.C.Types (CChar, CLong(..))
18
+ import Foreign.Ptr (Ptr)
19
+ import GHC.Generics (Generic)
20
+ import Text.Pandoc (ReaderOptions, Pandoc, WriterOptions(..), PandocError(..))
21
+ import Text.Pandoc.MediaBag (MediaBag)
22
+ import Text.Parsec.Error (ParseError, Message(..), errorPos, errorMessages)
23
+ import Text.Parsec.Pos (SourcePos, SourceName, Line, Column, sourceName, sourceLine, sourceColumn)
24
+
25
+
26
+ -- | The `Reader` type modified for foreign C exports
27
+ type CReader = ReaderOptions -> CStringLen -> EitherT CStringLen IO (Pandoc, MediaBag)
28
+
29
+ -- | The `Writer` type modified for foreign C exports
30
+ type CWriter = WriterOptions -> Pandoc -> EitherT CStringLen IO CStringLen
31
+
32
+
33
+ -- | A clone of `SourcePos`, to allow deriving `Generic`
34
+ data SourcePos' = SourcePos' SourceName !Line !Column deriving (Eq, Ord, Show, Generic)
35
+
36
+ instance ToJSON SourcePos' where
37
+ toEncoding = genericToEncoding defaultOptions
38
+
39
+ deriving instance Generic Message
40
+
41
+ instance ToJSON Message where
42
+ toEncoding = genericToEncoding defaultOptions
43
+
44
+ -- | A clone of `ParseError`, to allow deriving `Generic`
45
+ data ParseError' = ParseError' !SourcePos' [Message] deriving (Generic)
46
+
47
+ -- | Convert to our clone of `SourcePos`
48
+ sourcePos' :: SourcePos -> SourcePos'
49
+ sourcePos' !sp = SourcePos' (sourceName sp) (sourceLine sp) (sourceColumn sp)
50
+
51
+ -- | Convert to our clone of `ParseError`
52
+ parseError' :: ParseError -> ParseError'
53
+ parseError' !pe = ParseError' (sourcePos' (errorPos pe)) (errorMessages pe)
54
+
55
+
56
+ instance ToJSON ParseError where
57
+ toJSON = genericToJSON defaultOptions . parseError'
58
+ toEncoding = genericToEncoding defaultOptions . parseError'
59
+
60
+ instance ToJSON PandocError where
61
+ toEncoding = genericToEncoding defaultOptions
62
+
63
+ -- | Convert a `PandocError` to JSON
64
+ showPandocError :: PandocError -> IO CStringLen
65
+ showPandocError = flip useAsCStringLen return . toStrict . encode
66
+
67
+
68
+ -- | `CStringLen` for use by Ruby
69
+ type RStringLen = (Ptr CChar, CLong)
70
+
71
+
72
+
73
+
@@ -0,0 +1,78 @@
1
+ {-# LANGUAGE BangPatterns #-}
2
+ {-# LANGUAGE DeriveGeneric #-}
3
+ {-# LANGUAGE FlexibleContexts #-}
4
+ {-# LANGUAGE ForeignFunctionInterface #-}
5
+ {-# LANGUAGE RankNTypes #-}
6
+ {-# LANGUAGE StandaloneDeriving #-}
7
+ {-# LANGUAGE TupleSections #-}
8
+ {-# LANGUAGE ScopedTypeVariables #-}
9
+
10
+ module Text.Pandoc.C.Utils where
11
+
12
+ import Control.Concurrent
13
+ import Control.Monad.Trans.Class (lift)
14
+ import Control.Monad.Trans.Either (EitherT(..))
15
+ import Foreign.C.String (CStringLen, peekCStringLen, newCStringLen)
16
+ import System.Timeout (timeout)
17
+ import Text.Pandoc (Pandoc, Inline(..))
18
+ import Text.Pandoc.MediaBag (MediaBag, extractMediaBag, mediaDirectory)
19
+ import Text.Pandoc.Walk (walk)
20
+
21
+
22
+ -- | Apply a monadic action to the left of an `EitherT`
23
+ mapML :: Monad m => (a -> m b) -> EitherT a m c -> EitherT b m c
24
+ mapML !f !(EitherT m) = EitherT $ do
25
+ m' <- m
26
+ case m' of
27
+ ( Left x) -> Left <$> f x
28
+ ~(Right y) -> return $ Right y
29
+
30
+ -- | Apply a monadic action to the right of an `EitherT`
31
+ mapMR :: Monad m => (b -> m c) -> EitherT a m b -> EitherT a m c
32
+ mapMR !f !(EitherT m) = EitherT $ do
33
+ m' <- m
34
+ case m' of
35
+ ( Left x) -> return $ Left x
36
+ ~(Right y) -> Right <$> f y
37
+
38
+
39
+ -- | This function forks a thread to both catch exceptions and better kill infinite loops that can't be caught with `timeout` alone.
40
+ -- For example, @timeout 10 (forever $ return ())@ will never time out, but @timeoutEitherT 10 (lift $ forever $ return ())@ will.
41
+ --
42
+ -- timeoutEitherT is strict within the created thread
43
+ timeoutEitherT :: Int -> EitherT CStringLen IO b -> EitherT CStringLen IO b
44
+ timeoutEitherT !us !(EitherT io) = EitherT $ do
45
+ mvar <- newEmptyMVar
46
+ tid <- io `forkFinally` (putMVar mvar $!)
47
+ result <- timeout us (takeMVar mvar)
48
+ case result of
49
+ ( Nothing ) -> killThread tid >> Left <$> newCStringLen ("Pandoc timed out after " ++ show us ++ "microseconds")
50
+ ~(Just noTimeout) -> case noTimeout of
51
+ ( Right successful) -> return successful
52
+ ~(Left err ) -> Left <$> newCStringLen ("Pandoc internal error: " ++ show err)
53
+
54
+
55
+ -- | Taken from pandoc's executable, adjust the paths and extract the media to the given directory
56
+ -- https://github.com/jgm/pandoc/blob/9849ba7fd744f529f063e0802a18fa18c8433eeb/src/Text/Pandoc/Class.hs#L385
57
+ extractMedia :: MediaBag -> FilePath -> Pandoc -> IO Pandoc
58
+ extractMedia media dir d =
59
+ case [fp | (fp, _, _) <- mediaDirectory media] of
60
+ [] -> return d
61
+ fps -> do
62
+ extractMediaBag True dir media
63
+ return $ walk (adjustImagePath dir fps) d
64
+
65
+ -- | Also taken from pandoc's executable, adjust the image paths of inline elements
66
+ adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline
67
+ adjustImagePath dir paths (Image attr lab (src, tit))
68
+ | src `elem` paths = Image attr lab (dir ++ "/" ++ src, tit)
69
+ adjustImagePath _ _ x = x
70
+
71
+ -- | Extract the media bag to the given directory, unless that directory string is empty
72
+ extractCMediabag :: CStringLen -> MediaBag -> Pandoc -> EitherT CStringLen IO Pandoc
73
+ extractCMediabag !(_, 0) _ !pandoc = return pandoc
74
+ extractCMediabag !cPath !mediaBag !pandoc = do
75
+ nonEmptyPath <- lift $ peekCStringLen cPath
76
+ lift $ extractMedia mediaBag nonEmptyPath pandoc
77
+
78
+