pandoc_rb 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,74 @@
1
+ name: pandoc-rb
2
+ version: 0.1.0.0
3
+ synopsis: Call pandoc from ruby using the FFI
4
+ description: This package provides C exports made for use by Ruby for calling Pandoc
5
+ homepage: https://github.com/michaeljklein/pandoc_rb
6
+ license: BSD3
7
+ author: Michael Klein
8
+ maintainer: lambdamichael@gmail.com
9
+ category: FFI
10
+ build-type: Simple
11
+ cabal-version: >=1.10
12
+
13
+ executable PandocRb.dylib
14
+ main-is: PandocRb.hs
15
+ other-extensions: ForeignFunctionInterface
16
+ build-depends: base >= 4.7 && < 5
17
+ , pandoc-rb
18
+ , bytestring
19
+ , either
20
+ , pandoc
21
+ , pandoc-types
22
+ , storable-tuple
23
+ , transformers
24
+ , aeson
25
+ , parsec
26
+ hs-source-dirs: src
27
+ other-modules: Text.Pandoc.C
28
+ , Text.Pandoc.C.Types
29
+ , Text.Pandoc.C.Utils
30
+ default-language: Haskell2010
31
+ include-dirs: src
32
+ ghc-options: -O3 -shared -fPIC -dynamic -Wall
33
+ extra-libraries: HSrts-ghc8.0.2
34
+
35
+
36
+ library
37
+ hs-source-dirs: src
38
+ exposed-modules: Text.Pandoc.C
39
+ , Text.Pandoc.C.Types
40
+ , Text.Pandoc.C.Utils
41
+ build-depends: base >= 4.7 && < 5
42
+ , bytestring
43
+ , either
44
+ , pandoc == 1.19.2.1337
45
+ , pandoc-types
46
+ , storable-tuple
47
+ , transformers
48
+ , aeson
49
+ , parsec
50
+ ghc-options: -O3 -Wall
51
+ default-language: Haskell2010
52
+
53
+ test-suite pandoc-rb-test
54
+ type: detailed-0.9
55
+ hs-source-dirs: test
56
+ test-module: DetailedSpec
57
+ build-depends: base >= 4.7 && < 5
58
+ , Cabal >= 1.20.0
59
+ , QuickCheck
60
+ , bytestring
61
+ , either
62
+ , pandoc == 1.19.2.1337
63
+ , pandoc-types
64
+ , pandoc-rb
65
+ , storable-tuple
66
+ , transformers
67
+ ghc-options: -threaded -rtsopts -with-rtsopts=-N
68
+ default-language: Haskell2010
69
+
70
+ source-repository head
71
+ type: git
72
+ location: https://github.com/michaeljklein/pandoc_rb
73
+
74
+
@@ -0,0 +1,100 @@
1
+ #include <stdio.h>
2
+ #include "ruby.h"
3
+ #include "HsFFI.h"
4
+ #include "PandocRb_stub.h"
5
+
6
+ VALUE PandocRb = Qnil;
7
+
8
+ void Init_pandoc_rb();
9
+
10
+ // a global designating whether the haskell runtime has been initialized
11
+ int hs_inited = 0;
12
+
13
+ VALUE method_convert_init(VALUE self);
14
+ VALUE method_convert_exit(VALUE self);
15
+ VALUE method_convert_raw(VALUE self, VALUE in_format, VALUE out_format, VALUE input, VALUE extract_media_path);
16
+
17
+ void Init_pandoc_rb() {
18
+ PandocRb = rb_define_module("PandocRb");
19
+ rb_define_module_function(PandocRb, "convert_init", method_convert_init, 0);
20
+ rb_define_module_function(PandocRb, "convert_exit", method_convert_exit, 0);
21
+ rb_define_module_function(PandocRb, "convert_raw" , method_convert_raw , 4);
22
+ }
23
+
24
+ VALUE method_convert_init(VALUE self) {
25
+ if (hs_inited) {
26
+ printf("pandoc_rb warning: Called convert_init after init\n");
27
+ } else {
28
+ hs_init(NULL, NULL);
29
+ hs_inited = 1;
30
+ }
31
+ return Qnil;
32
+ }
33
+
34
+ VALUE method_convert_exit(VALUE self) {
35
+ if (hs_inited) {
36
+ hs_exit();
37
+ hs_inited = 0;
38
+ } else {
39
+ printf("pandoc_rb warning: Called convert_exit before init\n");
40
+ }
41
+ return Qnil;
42
+ }
43
+
44
+ // Only checks whether inputs are strings, has no defaults and niether inits nor exits
45
+ VALUE method_convert_raw(VALUE self, VALUE in_format, VALUE out_format, VALUE input, VALUE extract_media_path) {
46
+ char * in_format_ptr;
47
+ char * out_format_ptr;
48
+ char * input_ptr;
49
+ char * extract_media_path_ptr;
50
+ long in_format_len;
51
+ long out_format_len;
52
+ long input_len;
53
+ long extract_media_path_len;
54
+
55
+ void * output_ptr;
56
+
57
+ VALUE success;
58
+ VALUE result_str;
59
+ VALUE result_pair;
60
+
61
+ SafeStringValue(in_format);
62
+ SafeStringValue(out_format);
63
+ SafeStringValue(input);
64
+ SafeStringValue(extract_media_path);
65
+ in_format_ptr = StringValuePtr(in_format);
66
+ out_format_ptr = StringValuePtr(out_format);
67
+ input_ptr = StringValuePtr(input);
68
+ extract_media_path_ptr = StringValuePtr(extract_media_path);
69
+ in_format_len = RSTRING_LEN(in_format);
70
+ out_format_len = RSTRING_LEN(out_format);
71
+ input_len = RSTRING_LEN(input);
72
+ extract_media_path_len = RSTRING_LEN(extract_media_path);
73
+
74
+ output_ptr = convert_hs(in_format_ptr, in_format_len,
75
+ out_format_ptr, out_format_len,
76
+ input_ptr, input_len,
77
+ extract_media_path_ptr, extract_media_path_len);
78
+
79
+ RB_GC_GUARD(in_format);
80
+ RB_GC_GUARD(out_format);
81
+ RB_GC_GUARD(input);
82
+ RB_GC_GUARD(extract_media_path);
83
+
84
+ if (result_success(output_ptr)) {
85
+ success = Qtrue;
86
+ } else {
87
+ success = Qfalse;
88
+ }
89
+ result_str = rb_utf8_str_new(result_ptr(output_ptr), result_len(output_ptr));
90
+
91
+ free_result(output_ptr);
92
+
93
+ result_pair = rb_ary_new_from_args(2, success, result_str);
94
+
95
+ RB_GC_GUARD(success);
96
+ RB_GC_GUARD(result_str);
97
+
98
+ return result_pair;
99
+ }
100
+
@@ -0,0 +1,26 @@
1
+ {-# LANGUAGE BangPatterns #-}
2
+ {-# LANGUAGE DeriveGeneric #-}
3
+ {-# LANGUAGE FlexibleContexts #-}
4
+ {-# LANGUAGE ForeignFunctionInterface #-}
5
+ {-# LANGUAGE RankNTypes #-}
6
+ {-# LANGUAGE StandaloneDeriving #-}
7
+ {-# LANGUAGE TupleSections #-}
8
+ {-# LANGUAGE ScopedTypeVariables #-}
9
+
10
+ module PandocRb where
11
+
12
+ import Foreign.C.Types (CChar, CLong(..), CInt(..))
13
+ import Foreign.Ptr (Ptr)
14
+ import Text.Pandoc.C (convert_hs, result_success, result_ptr, result_len, free_result)
15
+
16
+
17
+ foreign export ccall convert_hs :: Ptr CChar -> CLong -> Ptr CChar -> CLong -> Ptr CChar -> CLong -> Ptr CChar -> CLong -> IO (Ptr (CInt, Ptr CChar, CLong))
18
+
19
+ foreign export ccall result_success :: Ptr (CInt, Ptr CChar, CLong) -> IO CInt
20
+
21
+ foreign export ccall result_ptr :: Ptr (CInt, Ptr CChar, CLong) -> IO (Ptr CChar)
22
+
23
+ foreign export ccall result_len :: Ptr (CInt, Ptr CChar, CLong) -> IO CLong
24
+
25
+ foreign export ccall free_result :: Ptr (CInt, Ptr CChar, CLong) -> IO ()
26
+
@@ -0,0 +1,147 @@
1
+ {-# LANGUAGE BangPatterns #-}
2
+ {-# LANGUAGE DeriveGeneric #-}
3
+ {-# LANGUAGE FlexibleContexts #-}
4
+ {-# LANGUAGE ForeignFunctionInterface #-}
5
+ {-# LANGUAGE RankNTypes #-}
6
+ {-# LANGUAGE StandaloneDeriving #-}
7
+ {-# LANGUAGE TupleSections #-}
8
+ {-# LANGUAGE ScopedTypeVariables #-}
9
+
10
+ module Text.Pandoc.C where
11
+
12
+ import Control.Monad.Trans.Class (lift)
13
+ import Control.Monad.Trans.Either (EitherT(..))
14
+ import Data.Bifunctor (bimap, second)
15
+ import Data.ByteString (useAsCStringLen)
16
+ import Data.ByteString.Lazy (toStrict, fromStrict)
17
+ import Data.ByteString.Unsafe (unsafePackCStringLen)
18
+ import Data.Monoid ((<>))
19
+ import Foreign.C.String (CStringLen, peekCStringLen, newCStringLen)
20
+ import Foreign.C.Types (CChar, CLong(..), CInt(..))
21
+ import Foreign.Marshal.Alloc (malloc, free)
22
+ import Foreign.Ptr (Ptr)
23
+ import Foreign.Storable (peek, poke)
24
+ import Foreign.Storable.Tuple ()
25
+ import Text.Pandoc (ReaderOptions, WriterOptions(..), Reader(..), Writer(..), HTMLMathMethod(..), writerMediaBag, getReader, getWriter, def)
26
+
27
+ import Text.Pandoc.C.Utils
28
+ import Text.Pandoc.C.Types
29
+
30
+
31
+ -- | Convert a `Reader` to a `CReader`
32
+ mkCReader :: Reader -> CReader
33
+ mkCReader !(StringReader reader) !opts !cstr = mapML showPandocError . EitherT $ do
34
+ str <- peekCStringLen cstr
35
+ readerResult <- reader opts str
36
+ return $ second (, mempty) $! readerResult
37
+ mkCReader ~(ByteStringReader reader) !opts str = mapML showPandocError . EitherT $ do
38
+ bs <- unsafePackCStringLen str
39
+ readerResult <- reader opts $! fromStrict bs
40
+ return readerResult
41
+
42
+
43
+ -- | Convert a `Writer` to a `CWriter`
44
+ mkCWriter :: Writer -> CWriter
45
+ mkCWriter !(PureStringWriter writer) !opts !pandoc = lift $ newCStringLen (writer opts pandoc)
46
+ mkCWriter !(IOStringWriter writer) !opts !pandoc = lift $ writer opts pandoc >>= newCStringLen
47
+ mkCWriter !(IOByteStringWriter writer) !opts !pandoc = lift $ writer opts pandoc >>= flip useAsCStringLen return . toStrict
48
+
49
+
50
+ -- | `getReader` for foreign C exports. Retrieve reader based on formatSpec (format+extensions).
51
+ getCReader :: CStringLen -> EitherT CStringLen IO CReader
52
+ getCReader !cstr = do
53
+ str <- lift . peekCStringLen $! cstr
54
+ EitherT $ case getReader str of
55
+ ( Left err ) -> Left <$> newCStringLen err
56
+ ~(Right reader) -> return . Right $! mkCReader reader
57
+
58
+ -- | `getWriter` for foreign C exports. Retrieve writer based on formatSpec (format+extensions).
59
+ getCWriter :: CStringLen -> EitherT CStringLen IO CWriter
60
+ getCWriter !cstr = do
61
+ str <- lift $ peekCStringLen cstr
62
+ EitherT $ case getWriter str of
63
+ ( Left err ) -> Left <$> newCStringLen err
64
+ ~(Right writer) -> return . Right $! mkCWriter writer
65
+
66
+
67
+ -- | The main conversion function.
68
+ -- Takes `ReaderOptions`, `WriterOptions`, reader's formatSpec,
69
+ -- writer's formatSpec, input and returns either an error string
70
+ -- or the output
71
+ convert :: ReaderOptions
72
+ -> WriterOptions
73
+ -> CStringLen -- ^ Reader specification
74
+ -> CStringLen -- ^ Writer specification
75
+ -> CStringLen -- ^ Input
76
+ -> CStringLen -- ^ Extract media dir
77
+ -> EitherT CStringLen IO CStringLen -- ^ Either an error string or the output
78
+ convert !readerOpts !writerOpts !readerStr !writerStr !input !mediaBagStr = do
79
+ reader <- getCReader $! readerStr
80
+ (pandoc, mediaBag) <- reader readerOpts $! input
81
+ pandoc' <- extractCMediabag mediaBagStr mediaBag pandoc
82
+ writer <- getCWriter $! writerStr
83
+ let writerOpts' = writerOpts { writerMediaBag = writerMediaBag writerOpts <> mediaBag }
84
+ writer writerOpts' pandoc'
85
+
86
+
87
+ -- | The `ReaderOptions` wired into the foreign export
88
+ readerOptions :: ReaderOptions
89
+ readerOptions = def
90
+
91
+ -- | The `WriterOptions` wired into the foreign export
92
+ writerOptions :: WriterOptions
93
+ writerOptions = def { writerHTMLMathMethod = MathJax "https://fail.cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS-MML_HTMLorMML" }
94
+
95
+
96
+ -- | Takes: reader, writer, input and returns (isSuccess, output pointer, output length)
97
+ convert_hs :: Ptr CChar -- ^ reader format pointer
98
+ -> CLong -- ^ reader format length
99
+ -> Ptr CChar -- ^ writer format pointer
100
+ -> CLong -- ^ writer format length
101
+ -> Ptr CChar -- ^ input pointer
102
+ -> CLong -- ^ input length
103
+ -> Ptr CChar -- ^ media extraction path pointer
104
+ -> CLong -- ^ media extraction path length
105
+ -> IO (Ptr (CInt, Ptr CChar, CLong)) -- ^ Pointer to @(success, output pointer, output length)@
106
+ convert_hs !readerStr !readerLen !writerStr !writerLen !input !inputLen !mediaBagStr !mediaBagLen = do
107
+ let readerStr' = (readerStr , fromEnum readerLen )
108
+ let writerStr' = (writerStr , fromEnum writerLen )
109
+ let input' = (input , fromEnum inputLen )
110
+ let mediaBagStr' = (mediaBagStr, fromEnum mediaBagLen)
111
+ let converted = convert readerOptions writerOptions readerStr' writerStr' input' mediaBagStr'
112
+ let EitherT safeConverted = timeoutEitherT (10 * {- minutes -} 60000000) converted
113
+ result <- bimap (second toEnum) (second toEnum) <$> safeConverted
114
+ let (success, (rstrPtr, rstrLen)) = either (0,) (1,) result
115
+ addr <- malloc
116
+ addr `poke` (success, rstrPtr, rstrLen)
117
+ return addr
118
+
119
+
120
+ -- | Extract the success value from a result pointer
121
+ result_success :: Ptr (CInt, Ptr CChar, CLong) -> IO CInt
122
+ result_success addr = do
123
+ (success, _, _) <- peek addr
124
+ return success
125
+
126
+ -- | Extract the pointer to the result string from a result pointer
127
+ result_ptr :: Ptr (CInt, Ptr CChar, CLong) -> IO (Ptr CChar)
128
+ result_ptr addr = do
129
+ (_, ptr, _) <- peek addr
130
+ return ptr
131
+
132
+ -- | Extract the length (in bytes) of the result string from a result pointer
133
+ result_len :: Ptr (CInt, Ptr CChar, CLong) -> IO CLong
134
+ result_len addr = do
135
+ (_, _, len) <- peek addr
136
+ return len
137
+
138
+ -- | Free a result pointer. Note: This only frees the struct that wraps the
139
+ -- result values, it does not free the result string's memory
140
+ free_result :: Ptr (CInt, Ptr CChar, CLong) -> IO ()
141
+ free_result addr = do
142
+ -- (_, ptr, _) <- peek addr -- this is commented out because the caller manages it
143
+ -- free ptr
144
+ free addr
145
+ return ()
146
+
147
+
@@ -0,0 +1,73 @@
1
+ {-# LANGUAGE BangPatterns #-}
2
+ {-# LANGUAGE DeriveGeneric #-}
3
+ {-# LANGUAGE FlexibleContexts #-}
4
+ {-# LANGUAGE ForeignFunctionInterface #-}
5
+ {-# LANGUAGE RankNTypes #-}
6
+ {-# LANGUAGE StandaloneDeriving #-}
7
+ {-# LANGUAGE TupleSections #-}
8
+ {-# LANGUAGE ScopedTypeVariables #-}
9
+
10
+ module Text.Pandoc.C.Types where
11
+
12
+ import Control.Monad.Trans.Either (EitherT(..))
13
+ import Data.Aeson (ToJSON(..), genericToEncoding, defaultOptions, genericToJSON, encode)
14
+ import Data.ByteString.Lazy (toStrict)
15
+ import Data.ByteString (useAsCStringLen)
16
+ import Foreign.C.String (CStringLen)
17
+ import Foreign.C.Types (CChar, CLong(..))
18
+ import Foreign.Ptr (Ptr)
19
+ import GHC.Generics (Generic)
20
+ import Text.Pandoc (ReaderOptions, Pandoc, WriterOptions(..), PandocError(..))
21
+ import Text.Pandoc.MediaBag (MediaBag)
22
+ import Text.Parsec.Error (ParseError, Message(..), errorPos, errorMessages)
23
+ import Text.Parsec.Pos (SourcePos, SourceName, Line, Column, sourceName, sourceLine, sourceColumn)
24
+
25
+
26
+ -- | The `Reader` type modified for foreign C exports
27
+ type CReader = ReaderOptions -> CStringLen -> EitherT CStringLen IO (Pandoc, MediaBag)
28
+
29
+ -- | The `Writer` type modified for foreign C exports
30
+ type CWriter = WriterOptions -> Pandoc -> EitherT CStringLen IO CStringLen
31
+
32
+
33
+ -- | A clone of `SourcePos`, to allow deriving `Generic`
34
+ data SourcePos' = SourcePos' SourceName !Line !Column deriving (Eq, Ord, Show, Generic)
35
+
36
+ instance ToJSON SourcePos' where
37
+ toEncoding = genericToEncoding defaultOptions
38
+
39
+ deriving instance Generic Message
40
+
41
+ instance ToJSON Message where
42
+ toEncoding = genericToEncoding defaultOptions
43
+
44
+ -- | A clone of `ParseError`, to allow deriving `Generic`
45
+ data ParseError' = ParseError' !SourcePos' [Message] deriving (Generic)
46
+
47
+ -- | Convert to our clone of `SourcePos`
48
+ sourcePos' :: SourcePos -> SourcePos'
49
+ sourcePos' !sp = SourcePos' (sourceName sp) (sourceLine sp) (sourceColumn sp)
50
+
51
+ -- | Convert to our clone of `ParseError`
52
+ parseError' :: ParseError -> ParseError'
53
+ parseError' !pe = ParseError' (sourcePos' (errorPos pe)) (errorMessages pe)
54
+
55
+
56
+ instance ToJSON ParseError where
57
+ toJSON = genericToJSON defaultOptions . parseError'
58
+ toEncoding = genericToEncoding defaultOptions . parseError'
59
+
60
+ instance ToJSON PandocError where
61
+ toEncoding = genericToEncoding defaultOptions
62
+
63
+ -- | Convert a `PandocError` to JSON
64
+ showPandocError :: PandocError -> IO CStringLen
65
+ showPandocError = flip useAsCStringLen return . toStrict . encode
66
+
67
+
68
+ -- | `CStringLen` for use by Ruby
69
+ type RStringLen = (Ptr CChar, CLong)
70
+
71
+
72
+
73
+
@@ -0,0 +1,78 @@
1
+ {-# LANGUAGE BangPatterns #-}
2
+ {-# LANGUAGE DeriveGeneric #-}
3
+ {-# LANGUAGE FlexibleContexts #-}
4
+ {-# LANGUAGE ForeignFunctionInterface #-}
5
+ {-# LANGUAGE RankNTypes #-}
6
+ {-# LANGUAGE StandaloneDeriving #-}
7
+ {-# LANGUAGE TupleSections #-}
8
+ {-# LANGUAGE ScopedTypeVariables #-}
9
+
10
+ module Text.Pandoc.C.Utils where
11
+
12
+ import Control.Concurrent
13
+ import Control.Monad.Trans.Class (lift)
14
+ import Control.Monad.Trans.Either (EitherT(..))
15
+ import Foreign.C.String (CStringLen, peekCStringLen, newCStringLen)
16
+ import System.Timeout (timeout)
17
+ import Text.Pandoc (Pandoc, Inline(..))
18
+ import Text.Pandoc.MediaBag (MediaBag, extractMediaBag, mediaDirectory)
19
+ import Text.Pandoc.Walk (walk)
20
+
21
+
22
+ -- | Apply a monadic action to the left of an `EitherT`
23
+ mapML :: Monad m => (a -> m b) -> EitherT a m c -> EitherT b m c
24
+ mapML !f !(EitherT m) = EitherT $ do
25
+ m' <- m
26
+ case m' of
27
+ ( Left x) -> Left <$> f x
28
+ ~(Right y) -> return $ Right y
29
+
30
+ -- | Apply a monadic action to the right of an `EitherT`
31
+ mapMR :: Monad m => (b -> m c) -> EitherT a m b -> EitherT a m c
32
+ mapMR !f !(EitherT m) = EitherT $ do
33
+ m' <- m
34
+ case m' of
35
+ ( Left x) -> return $ Left x
36
+ ~(Right y) -> Right <$> f y
37
+
38
+
39
+ -- | This function forks a thread to both catch exceptions and better kill infinite loops that can't be caught with `timeout` alone.
40
+ -- For example, @timeout 10 (forever $ return ())@ will never time out, but @timeoutEitherT 10 (lift $ forever $ return ())@ will.
41
+ --
42
+ -- timeoutEitherT is strict within the created thread
43
+ timeoutEitherT :: Int -> EitherT CStringLen IO b -> EitherT CStringLen IO b
44
+ timeoutEitherT !us !(EitherT io) = EitherT $ do
45
+ mvar <- newEmptyMVar
46
+ tid <- io `forkFinally` (putMVar mvar $!)
47
+ result <- timeout us (takeMVar mvar)
48
+ case result of
49
+ ( Nothing ) -> killThread tid >> Left <$> newCStringLen ("Pandoc timed out after " ++ show us ++ "microseconds")
50
+ ~(Just noTimeout) -> case noTimeout of
51
+ ( Right successful) -> return successful
52
+ ~(Left err ) -> Left <$> newCStringLen ("Pandoc internal error: " ++ show err)
53
+
54
+
55
+ -- | Taken from pandoc's executable, adjust the paths and extract the media to the given directory
56
+ -- https://github.com/jgm/pandoc/blob/9849ba7fd744f529f063e0802a18fa18c8433eeb/src/Text/Pandoc/Class.hs#L385
57
+ extractMedia :: MediaBag -> FilePath -> Pandoc -> IO Pandoc
58
+ extractMedia media dir d =
59
+ case [fp | (fp, _, _) <- mediaDirectory media] of
60
+ [] -> return d
61
+ fps -> do
62
+ extractMediaBag True dir media
63
+ return $ walk (adjustImagePath dir fps) d
64
+
65
+ -- | Also taken from pandoc's executable, adjust the image paths of inline elements
66
+ adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline
67
+ adjustImagePath dir paths (Image attr lab (src, tit))
68
+ | src `elem` paths = Image attr lab (dir ++ "/" ++ src, tit)
69
+ adjustImagePath _ _ x = x
70
+
71
+ -- | Extract the media bag to the given directory, unless that directory string is empty
72
+ extractCMediabag :: CStringLen -> MediaBag -> Pandoc -> EitherT CStringLen IO Pandoc
73
+ extractCMediabag !(_, 0) _ !pandoc = return pandoc
74
+ extractCMediabag !cPath !mediaBag !pandoc = do
75
+ nonEmptyPath <- lift $ peekCStringLen cPath
76
+ lift $ extractMedia mediaBag nonEmptyPath pandoc
77
+
78
+