pygments.rb 0.2.8 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +10 -1
- data/lib/pygments/version.rb +1 -1
- data/vendor/custom_lexers/github.py +330 -0
- data/vendor/pygments-main/AUTHORS +1 -0
- data/vendor/pygments-main/CHANGES +11 -0
- data/vendor/pygments-main/REVISION +1 -1
- data/vendor/pygments-main/pygments/lexers/_mapping.py +6 -0
- data/vendor/pygments-main/pygments/lexers/functional.py +6 -8
- data/vendor/pygments-main/pygments/lexers/github.py +330 -0
- data/vendor/pygments-main/pygments/lexers/math.py +158 -3
- data/vendor/pygments-main/pygments/lexers/text.py +1 -1
- data/vendor/pygments-main/pygments/lexers/web.py +11 -8
- data/vendor/pygments-main/tests/examplefiles/AcidStateAdvanced.hs +209 -0
- data/vendor/pygments-main/tests/examplefiles/string.jl +1031 -0
- data/vendor/pygments-main/tests/examplefiles/test.css +5 -0
- data/vendor/pygments-main/tests/examplefiles/test.xqy +3 -1
- data/vendor/pygments-main/tests/test_basic_api.py +23 -11
- data/vendor/pygments-main/tests/test_cmdline.py +25 -21
- data/vendor/pygments-main/tests/test_examplefiles.py +5 -1
- data/vendor/pygments-main/tests/test_html_formatter.py +24 -16
- data/vendor/pygments-main/tests/test_latex_formatter.py +13 -5
- data/vendor/pygments-main/tests/test_regexlexer.py +1 -1
- data/vendor/pygments-main/tests/test_token.py +7 -10
- data/vendor/pygments-main/tests/test_using_api.py +1 -1
- data/vendor/pygments-main/tests/test_util.py +26 -27
- metadata +9 -7
@@ -1718,7 +1718,7 @@ class PyPyLogLexer(RegexLexer):
|
|
1718
1718
|
r"int_xor|int_eq|int_ne|int_ge|int_gt|int_le|int_lt|int_is_zero|"
|
1719
1719
|
r"int_is_true|"
|
1720
1720
|
r"uint_floordiv|uint_ge|uint_lt|"
|
1721
|
-
r"float_add|float_sub|float_mul|float_truediv|"
|
1721
|
+
r"float_add|float_sub|float_mul|float_truediv|float_neg|"
|
1722
1722
|
r"float_eq|float_ne|float_ge|float_gt|float_le|float_lt|float_abs|"
|
1723
1723
|
r"ptr_eq|ptr_ne|instance_ptr_eq|instance_ptr_ne|"
|
1724
1724
|
r"cast_int_to_float|cast_float_to_int|"
|
@@ -456,7 +456,7 @@ class CssLexer(RegexLexer):
|
|
456
456
|
(r'[\[\]();]+', Punctuation),
|
457
457
|
(r'"(\\\\|\\"|[^"])*"', String.Double),
|
458
458
|
(r"'(\\\\|\\'|[^'])*'", String.Single),
|
459
|
-
(r'[a-zA-Z_][a-zA-Z0-9_]
|
459
|
+
(r'[a-zA-Z_][a-zA-Z0-9_]*', Name)
|
460
460
|
]
|
461
461
|
}
|
462
462
|
|
@@ -2651,12 +2651,14 @@ class XQueryLexer(ExtendedRegexLexer):
|
|
2651
2651
|
(r'(\.\d+)[eE][\+\-]?\d+', Number.Double, 'operator'),
|
2652
2652
|
(r'(\.\d+|\d+\.\d*)', Number, 'operator'),
|
2653
2653
|
(r'(\d+)', Number.Integer, 'operator'),
|
2654
|
-
(r'(\.\.|\.|\)
|
2654
|
+
(r'(\.\.|\.|\))', Punctuation, 'operator'),
|
2655
2655
|
(r'(declare)(\s+)(construction)',
|
2656
2656
|
bygroups(Keyword, Text, Keyword), 'operator'),
|
2657
2657
|
(r'(declare)(\s+)(default)(\s+)(order)',
|
2658
2658
|
bygroups(Keyword, Text, Keyword, Text, Keyword), 'operator'),
|
2659
2659
|
(ncname + ':\*', Name, 'operator'),
|
2660
|
+
('\*:'+ncname, Name.Tag, 'operator'),
|
2661
|
+
('\*', Name.Tag, 'operator'),
|
2660
2662
|
(stringdouble, String.Double, 'operator'),
|
2661
2663
|
(stringsingle, String.Single, 'operator'),
|
2662
2664
|
|
@@ -2767,16 +2769,17 @@ class XQueryLexer(ExtendedRegexLexer):
|
|
2767
2769
|
(r'(catch)(\s*)(\()(\$)',
|
2768
2770
|
bygroups(Keyword, Text, Punctuation, Name.Variable), 'varname'),
|
2769
2771
|
|
2770
|
-
(r'(@'
|
2771
|
-
(r'(
|
2772
|
-
(r'
|
2772
|
+
(r'(@'+qname+')', Name.Attribute),
|
2773
|
+
(r'(@'+ncname+')', Name.Attribute),
|
2774
|
+
(r'@\*:'+ncname, Name.Attribute),
|
2775
|
+
(r'(@)', Name.Attribute),
|
2773
2776
|
|
2774
2777
|
(r'//|/|\+|-|;|,|\(|\)', Punctuation),
|
2775
2778
|
|
2776
2779
|
# STANDALONE QNAMES
|
2777
|
-
(qname + r'(?=\s*{)', Name.
|
2778
|
-
(qname + r'(?=\s*\()', Name.Function, 'qname_braren'),
|
2779
|
-
(qname, Name.
|
2780
|
+
(qname + r'(?=\s*{)', Name.Tag, 'qname_braren'),
|
2781
|
+
(qname + r'(?=\s*\([^:])', Name.Function, 'qname_braren'),
|
2782
|
+
(qname, Name.Tag, 'operator'),
|
2780
2783
|
]
|
2781
2784
|
}
|
2782
2785
|
|
@@ -0,0 +1,209 @@
|
|
1
|
+
{-# LANGUAGE DeriveDataTypeable, FlexibleContexts, GeneralizedNewtypeDeriving
|
2
|
+
, MultiParamTypeClasses, OverloadedStrings, ScopedTypeVariables, TemplateHaskell
|
3
|
+
, TypeFamilies, FlexibleInstances #-}
|
4
|
+
module Main where
|
5
|
+
import Control.Applicative (Applicative, Alternative, (<$>))
|
6
|
+
import Control.Exception.Lifted (bracket)
|
7
|
+
import Control.Monad.Trans.Control (MonadBaseControl)
|
8
|
+
import Control.Monad (MonadPlus, mplus)
|
9
|
+
import Control.Monad.Reader (MonadReader, ReaderT(..), ask)
|
10
|
+
import Control.Monad.Trans (MonadIO(..))
|
11
|
+
import Data.Acid ( AcidState(..), EventState(..), EventResult(..)
|
12
|
+
, Query(..), QueryEvent(..), Update(..), UpdateEvent(..)
|
13
|
+
, IsAcidic(..), makeAcidic, openLocalState
|
14
|
+
)
|
15
|
+
import Data.Acid.Local ( createCheckpointAndClose
|
16
|
+
, openLocalStateFrom
|
17
|
+
)
|
18
|
+
import Data.Acid.Advanced (query', update')
|
19
|
+
import Data.Maybe (fromMaybe)
|
20
|
+
import Data.SafeCopy (SafeCopy, base, deriveSafeCopy)
|
21
|
+
import Data.Data (Data, Typeable)
|
22
|
+
import Data.Lens ((%=), (!=))
|
23
|
+
import Data.Lens.Template (makeLens)
|
24
|
+
import Data.Text.Lazy (Text)
|
25
|
+
import Happstack.Server ( Happstack, HasRqData, Method(GET, POST), Request(rqMethod)
|
26
|
+
, Response
|
27
|
+
, ServerPartT(..), WebMonad, FilterMonad, ServerMonad
|
28
|
+
, askRq, decodeBody, dir, defaultBodyPolicy, lookText
|
29
|
+
, mapServerPartT, nullConf, nullDir, ok, simpleHTTP
|
30
|
+
, toResponse
|
31
|
+
)
|
32
|
+
import Prelude hiding (head, id)
|
33
|
+
import System.FilePath ((</>))
|
34
|
+
import Text.Blaze ((!))
|
35
|
+
import Text.Blaze.Html4.Strict (body, head, html, input, form, label, p, title, toHtml)
|
36
|
+
import Text.Blaze.Html4.Strict.Attributes (action, enctype, for, id, method, name, type_, value)
|
37
|
+
class HasAcidState m st where
|
38
|
+
getAcidState :: m (AcidState st)
|
39
|
+
query :: forall event m.
|
40
|
+
( Functor m
|
41
|
+
, MonadIO m
|
42
|
+
, QueryEvent event
|
43
|
+
, HasAcidState m (EventState event)
|
44
|
+
) =>
|
45
|
+
event
|
46
|
+
-> m (EventResult event)
|
47
|
+
query event =
|
48
|
+
do as <- getAcidState
|
49
|
+
query' (as :: AcidState (EventState event)) event
|
50
|
+
update :: forall event m.
|
51
|
+
( Functor m
|
52
|
+
, MonadIO m
|
53
|
+
, UpdateEvent event
|
54
|
+
, HasAcidState m (EventState event)
|
55
|
+
) =>
|
56
|
+
event
|
57
|
+
-> m (EventResult event)
|
58
|
+
update event =
|
59
|
+
do as <- getAcidState
|
60
|
+
update' (as :: AcidState (EventState event)) event
|
61
|
+
-- | bracket the opening and close of the `AcidState` handle.
|
62
|
+
|
63
|
+
-- automatically creates a checkpoint on close
|
64
|
+
withLocalState :: (MonadBaseControl IO m, MonadIO m, IsAcidic st, Typeable st) =>
|
65
|
+
Maybe FilePath -- ^ path to state directory
|
66
|
+
-> st -- ^ initial state value
|
67
|
+
-> (AcidState st -> m a) -- ^ function which uses the `AcidState` handle
|
68
|
+
-> m a
|
69
|
+
withLocalState mPath initialState =
|
70
|
+
bracket (liftIO $ (maybe openLocalState openLocalStateFrom mPath) initialState)
|
71
|
+
(liftIO . createCheckpointAndClose)
|
72
|
+
-- State that stores a hit count
|
73
|
+
|
74
|
+
data CountState = CountState { _count :: Integer }
|
75
|
+
deriving (Eq, Ord, Data, Typeable, Show)
|
76
|
+
|
77
|
+
$(deriveSafeCopy 0 'base ''CountState)
|
78
|
+
$(makeLens ''CountState)
|
79
|
+
|
80
|
+
initialCountState :: CountState
|
81
|
+
initialCountState = CountState { _count = 0 }
|
82
|
+
|
83
|
+
incCount :: Update CountState Integer
|
84
|
+
incCount = count %= succ
|
85
|
+
|
86
|
+
$(makeAcidic ''CountState ['incCount])
|
87
|
+
-- State that stores a greeting
|
88
|
+
data GreetingState = GreetingState { _greeting :: Text }
|
89
|
+
deriving (Eq, Ord, Data, Typeable, Show)
|
90
|
+
|
91
|
+
$(deriveSafeCopy 0 'base ''GreetingState)
|
92
|
+
$(makeLens ''GreetingState)
|
93
|
+
|
94
|
+
initialGreetingState :: GreetingState
|
95
|
+
initialGreetingState = GreetingState { _greeting = "Hello" }
|
96
|
+
|
97
|
+
getGreeting :: Query GreetingState Text
|
98
|
+
getGreeting = _greeting <$> ask
|
99
|
+
|
100
|
+
setGreeting :: Text -> Update GreetingState Text
|
101
|
+
setGreeting txt = greeting != txt
|
102
|
+
|
103
|
+
$(makeAcidic ''GreetingState ['getGreeting, 'setGreeting])
|
104
|
+
data Acid = Acid { acidCountState :: AcidState CountState
|
105
|
+
, acidGreetingState :: AcidState GreetingState
|
106
|
+
}
|
107
|
+
|
108
|
+
withAcid :: Maybe FilePath -> (Acid -> IO a) -> IO a
|
109
|
+
withAcid mBasePath action =
|
110
|
+
let basePath = fromMaybe "_state" mBasePath
|
111
|
+
in withLocalState (Just $ basePath </> "count") initialCountState $ \c ->
|
112
|
+
withLocalState (Just $ basePath </> "greeting") initialGreetingState $ \g ->
|
113
|
+
action (Acid c g)
|
114
|
+
newtype App a = App { unApp :: ServerPartT (ReaderT Acid IO) a }
|
115
|
+
deriving ( Functor, Alternative, Applicative, Monad, MonadPlus, MonadIO
|
116
|
+
, HasRqData, ServerMonad ,WebMonad Response, FilterMonad Response
|
117
|
+
, Happstack, MonadReader Acid)
|
118
|
+
|
119
|
+
runApp :: Acid -> App a -> ServerPartT IO a
|
120
|
+
runApp acid (App sp) = mapServerPartT (flip runReaderT acid) sp
|
121
|
+
instance HasAcidState App CountState where
|
122
|
+
getAcidState = acidCountState <$> ask
|
123
|
+
|
124
|
+
instance HasAcidState App GreetingState where
|
125
|
+
getAcidState = acidGreetingState <$> ask
|
126
|
+
page :: App Response
|
127
|
+
page =
|
128
|
+
do nullDir
|
129
|
+
g <- greet
|
130
|
+
c <- update IncCount -- ^ a CountState event
|
131
|
+
ok $ toResponse $
|
132
|
+
html $ do
|
133
|
+
head $ do
|
134
|
+
title "acid-state demo"
|
135
|
+
body $ do
|
136
|
+
form ! action "/" ! method "POST" ! enctype "multipart/form-data" $ do
|
137
|
+
label "new message: " ! for "msg"
|
138
|
+
input ! type_ "text" ! id "msg" ! name "greeting"
|
139
|
+
input ! type_ "submit" ! value "update message"
|
140
|
+
p $ toHtml g
|
141
|
+
p $ do "This page has been loaded "
|
142
|
+
toHtml c
|
143
|
+
" time(s)."
|
144
|
+
where
|
145
|
+
greet =
|
146
|
+
do m <- rqMethod <$> askRq
|
147
|
+
case m of
|
148
|
+
POST ->
|
149
|
+
do decodeBody (defaultBodyPolicy "/tmp/" 0 1000 1000)
|
150
|
+
newGreeting <- lookText "greeting"
|
151
|
+
update (SetGreeting newGreeting) -- ^ a GreetingState event
|
152
|
+
return newGreeting
|
153
|
+
GET ->
|
154
|
+
do query GetGreeting -- ^ a GreetingState event
|
155
|
+
main :: IO ()
|
156
|
+
main =
|
157
|
+
withAcid Nothing $ \acid ->
|
158
|
+
simpleHTTP nullConf $ runApp acid page
|
159
|
+
newtype FooState = FooState { foo :: Text }
|
160
|
+
deriving (Eq, Ord, Data, Typeable, SafeCopy)
|
161
|
+
|
162
|
+
initialFooState :: FooState
|
163
|
+
initialFooState = FooState { foo = "foo" }
|
164
|
+
|
165
|
+
askFoo :: Query FooState Text
|
166
|
+
askFoo = foo <$> ask
|
167
|
+
|
168
|
+
$(makeAcidic ''FooState ['askFoo])
|
169
|
+
fooPlugin :: (Happstack m, HasAcidState m FooState) => m Response
|
170
|
+
fooPlugin =
|
171
|
+
dir "foo" $ do
|
172
|
+
txt <- query AskFoo
|
173
|
+
ok $ toResponse txt
|
174
|
+
data Acid' = Acid' { acidCountState' :: AcidState CountState
|
175
|
+
, acidGreetingState' :: AcidState GreetingState
|
176
|
+
, acidFooState' :: AcidState FooState
|
177
|
+
}
|
178
|
+
withAcid' :: Maybe FilePath -> (Acid' -> IO a) -> IO a
|
179
|
+
withAcid' mBasePath action =
|
180
|
+
let basePath = fromMaybe "_state" mBasePath
|
181
|
+
in withLocalState (Just $ basePath </> "count") initialCountState $ \c ->
|
182
|
+
withLocalState (Just $ basePath </> "greeting") initialGreetingState $ \g ->
|
183
|
+
withLocalState (Just $ basePath </> "foo") initialFooState $ \f ->
|
184
|
+
action (Acid' c g f)
|
185
|
+
newtype App' a = App' { unApp' :: ServerPartT (ReaderT Acid' IO) a }
|
186
|
+
deriving ( Functor, Alternative, Applicative, Monad, MonadPlus, MonadIO
|
187
|
+
, HasRqData, ServerMonad ,WebMonad Response, FilterMonad Response
|
188
|
+
, Happstack, MonadReader Acid')
|
189
|
+
|
190
|
+
instance HasAcidState App' FooState where
|
191
|
+
getAcidState = acidFooState' <$> ask
|
192
|
+
fooAppPlugin :: App' Response
|
193
|
+
fooAppPlugin = fooPlugin
|
194
|
+
fooReaderPlugin :: ReaderT (AcidState FooState) (ServerPartT IO) Response
|
195
|
+
fooReaderPlugin = fooPlugin
|
196
|
+
instance HasAcidState (ReaderT (AcidState FooState) (ServerPartT IO)) FooState where
|
197
|
+
getAcidState = ask
|
198
|
+
withFooPlugin :: (MonadIO m, MonadBaseControl IO m) =>
|
199
|
+
FilePath -- ^ path to state directory
|
200
|
+
-> (ServerPartT IO Response -> m a) -- ^ function that uses fooPlugin
|
201
|
+
-> m a
|
202
|
+
withFooPlugin basePath f =
|
203
|
+
do withLocalState (Just $ basePath </> "foo") initialFooState $ \fooState ->
|
204
|
+
f $ runReaderT fooReaderPlugin fooState
|
205
|
+
main' :: IO ()
|
206
|
+
main' =
|
207
|
+
withFooPlugin "_state" $ \fooPlugin' ->
|
208
|
+
withAcid Nothing $ \acid ->
|
209
|
+
simpleHTTP nullConf $ fooPlugin' `mplus` runApp acid page
|
@@ -0,0 +1,1031 @@
|
|
1
|
+
## core string functions ##
|
2
|
+
|
3
|
+
length(s::String) = error("you must implement length(",typeof(s),")")
|
4
|
+
next(s::String, i::Int) = error("you must implement next(",typeof(s),",Int)")
|
5
|
+
next(s::DirectIndexString, i::Int) = (s[i],i+1)
|
6
|
+
next(s::String, i::Integer) = next(s,int(i))
|
7
|
+
|
8
|
+
## generic supplied functions ##
|
9
|
+
|
10
|
+
start(s::String) = 1
|
11
|
+
done(s::String,i) = (i > length(s))
|
12
|
+
isempty(s::String) = done(s,start(s))
|
13
|
+
ref(s::String, i::Int) = next(s,i)[1]
|
14
|
+
ref(s::String, i::Integer) = s[int(i)]
|
15
|
+
ref(s::String, x::Real) = s[iround(x)]
|
16
|
+
ref{T<:Integer}(s::String, r::Range1{T}) = s[int(first(r)):int(last(r))]
|
17
|
+
|
18
|
+
symbol(s::String) = symbol(cstring(s))
|
19
|
+
string(s::String) = s
|
20
|
+
|
21
|
+
print(s::String) = for c=s; print(c); end
|
22
|
+
print(x...) = for i=x; print(i); end
|
23
|
+
println(args...) = print(args..., '\n')
|
24
|
+
|
25
|
+
show(s::String) = print_quoted(s)
|
26
|
+
|
27
|
+
(*)(s::String...) = strcat(s...)
|
28
|
+
(^)(s::String, r::Integer) = repeat(s,r)
|
29
|
+
|
30
|
+
size(s::String) = (length(s),)
|
31
|
+
size(s::String, d::Integer) = d==1 ? length(s) :
|
32
|
+
error("in size: dimension ",d," out of range")
|
33
|
+
|
34
|
+
strlen(s::DirectIndexString) = length(s)
|
35
|
+
function strlen(s::String)
|
36
|
+
i = start(s)
|
37
|
+
if done(s,i)
|
38
|
+
return 0
|
39
|
+
end
|
40
|
+
n = 1
|
41
|
+
while true
|
42
|
+
c, j = next(s,i)
|
43
|
+
if done(s,j)
|
44
|
+
return n
|
45
|
+
end
|
46
|
+
n += 1
|
47
|
+
i = j
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
isvalid(s::DirectIndexString, i::Integer) = (start(s) <= i <= length(s))
|
52
|
+
function isvalid(s::String, i::Integer)
|
53
|
+
try
|
54
|
+
next(s,i)
|
55
|
+
true
|
56
|
+
catch
|
57
|
+
false
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
prevind(s::DirectIndexString, i::Integer) = i-1
|
62
|
+
thisind(s::DirectIndexString, i::Integer) = i
|
63
|
+
nextind(s::DirectIndexString, i::Integer) = i+1
|
64
|
+
|
65
|
+
prevind(s::String, i::Integer) = thisind(s,thisind(s,i)-1)
|
66
|
+
|
67
|
+
function thisind(s::String, i::Integer)
|
68
|
+
for j = i:-1:1
|
69
|
+
if isvalid(s,j)
|
70
|
+
return j
|
71
|
+
end
|
72
|
+
end
|
73
|
+
return 0 # out of range
|
74
|
+
end
|
75
|
+
|
76
|
+
function nextind(s::String, i::Integer)
|
77
|
+
for j = i+1:length(s)
|
78
|
+
if isvalid(s,j)
|
79
|
+
return j
|
80
|
+
end
|
81
|
+
end
|
82
|
+
length(s)+1 # out of range
|
83
|
+
end
|
84
|
+
|
85
|
+
ind2chr(s::DirectIndexString, i::Integer) = i
|
86
|
+
chr2ind(s::DirectIndexString, i::Integer) = i
|
87
|
+
|
88
|
+
function ind2chr(s::String, i::Integer)
|
89
|
+
s[i] # throws error if invalid
|
90
|
+
j = 1
|
91
|
+
k = start(s)
|
92
|
+
while true
|
93
|
+
c, l = next(s,k)
|
94
|
+
if i <= k
|
95
|
+
return j
|
96
|
+
end
|
97
|
+
j += 1
|
98
|
+
k = l
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
function chr2ind(s::String, i::Integer)
|
103
|
+
if i < 1
|
104
|
+
return i
|
105
|
+
end
|
106
|
+
j = 1
|
107
|
+
k = start(s)
|
108
|
+
while true
|
109
|
+
c, l = next(s,k)
|
110
|
+
if i == j
|
111
|
+
return k
|
112
|
+
end
|
113
|
+
j += 1
|
114
|
+
k = l
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
function strchr(s::String, c::Char, i::Integer)
|
119
|
+
i = nextind(s,i)
|
120
|
+
while !done(s,i)
|
121
|
+
d, j = next(s,i)
|
122
|
+
if c == d
|
123
|
+
return i
|
124
|
+
end
|
125
|
+
i = j
|
126
|
+
end
|
127
|
+
return 0
|
128
|
+
end
|
129
|
+
strchr(s::String, c::Char) = strchr(s, c, start(s))
|
130
|
+
contains(s::String, c::Char) = (strchr(s,c)!=0)
|
131
|
+
|
132
|
+
function chars(s::String)
|
133
|
+
cx = Array(Char,strlen(s))
|
134
|
+
i = 0
|
135
|
+
for c in s
|
136
|
+
cx[i += 1] = c
|
137
|
+
end
|
138
|
+
return cx
|
139
|
+
end
|
140
|
+
|
141
|
+
function cmp(a::String, b::String)
|
142
|
+
i = start(a)
|
143
|
+
j = start(b)
|
144
|
+
while !done(a,i) && !done(b,i)
|
145
|
+
c, i = next(a,i)
|
146
|
+
d, j = next(b,j)
|
147
|
+
if c != d
|
148
|
+
return c < d ? -1 : +1
|
149
|
+
end
|
150
|
+
end
|
151
|
+
done(a,i) && !done(b,j) ? -1 :
|
152
|
+
!done(a,i) && done(b,j) ? +1 : 0
|
153
|
+
end
|
154
|
+
|
155
|
+
isequal(a::String, b::String) = cmp(a,b) == 0
|
156
|
+
isless(a::String, b::String) = cmp(a,b) < 0
|
157
|
+
|
158
|
+
# faster comparisons for byte strings
|
159
|
+
|
160
|
+
cmp(a::ByteString, b::ByteString) = lexcmp(a.data, b.data)
|
161
|
+
isequal(a::ByteString, b::ByteString) = length(a)==length(b) && cmp(a,b)==0
|
162
|
+
|
163
|
+
## character column width function ##
|
164
|
+
|
165
|
+
charwidth(c::Char) = max(0,int(ccall(:wcwidth, Int32, (Char,), c)))
|
166
|
+
strwidth(s::String) = (w=0; for c in s; w += charwidth(c); end; w)
|
167
|
+
strwidth(s::ByteString) = ccall(:u8_strwidth, Int, (Ptr{Uint8},), s.data)
|
168
|
+
# TODO: implement and use u8_strnwidth that takes a length argument
|
169
|
+
|
170
|
+
## generic string uses only length and next ##
|
171
|
+
|
172
|
+
type GenericString <: String
|
173
|
+
string::String
|
174
|
+
end
|
175
|
+
|
176
|
+
length(s::GenericString) = length(s.string)
|
177
|
+
next(s::GenericString, i::Int) = next(s.string, i)
|
178
|
+
|
179
|
+
## plain old character arrays ##
|
180
|
+
|
181
|
+
type CharString <: String
|
182
|
+
chars::Array{Char,1}
|
183
|
+
|
184
|
+
CharString(a::Array{Char,1}) = new(a)
|
185
|
+
CharString(c::Char...) = new([ c[i] | i=1:length(c) ])
|
186
|
+
end
|
187
|
+
CharString(x...) = CharString(map(char,x)...)
|
188
|
+
|
189
|
+
next(s::CharString, i::Int) = (s.chars[i], i+1)
|
190
|
+
length(s::CharString) = length(s.chars)
|
191
|
+
strlen(s::CharString) = length(s)
|
192
|
+
|
193
|
+
string(c::Char) = CharString(c)
|
194
|
+
string(c::Char, x::Char...) = CharString(c, x...)
|
195
|
+
|
196
|
+
## substrings reference original strings ##
|
197
|
+
|
198
|
+
type SubString <: String
|
199
|
+
string::String
|
200
|
+
offset::Int
|
201
|
+
length::Int
|
202
|
+
|
203
|
+
SubString(s::String, i::Int, j::Int) = new(s, i-1, j-i+1)
|
204
|
+
SubString(s::SubString, i::Int, j::Int) =
|
205
|
+
new(s.string, i-1+s.offset, j-i+1)
|
206
|
+
end
|
207
|
+
SubString(s::String, i::Integer, j::Integer) = SubString(s, int(i), int(j))
|
208
|
+
|
209
|
+
function next(s::SubString, i::Int)
|
210
|
+
if i < 1 || i > s.length
|
211
|
+
error("string index out of bounds")
|
212
|
+
end
|
213
|
+
c, i = next(s.string, i+s.offset)
|
214
|
+
c, i-s.offset
|
215
|
+
end
|
216
|
+
|
217
|
+
length(s::SubString) = s.length
|
218
|
+
# TODO: strlen(s::SubString) = ??
|
219
|
+
# default implementation will work but it's slow
|
220
|
+
# can this be delegated efficiently somehow?
|
221
|
+
# that may require additional string interfaces
|
222
|
+
|
223
|
+
function ref(s::String, r::Range1{Int})
|
224
|
+
if first(r) < 1 || length(s) < last(r)
|
225
|
+
error("in substring slice: index out of range")
|
226
|
+
end
|
227
|
+
SubString(s, first(r), last(r))
|
228
|
+
end
|
229
|
+
|
230
|
+
## efficient representation of repeated strings ##
|
231
|
+
|
232
|
+
type RepString <: String
|
233
|
+
string::String
|
234
|
+
repeat::Integer
|
235
|
+
end
|
236
|
+
|
237
|
+
length(s::RepString) = length(s.string)*s.repeat
|
238
|
+
strlen(s::RepString) = strlen(s.string)*s.repeat
|
239
|
+
|
240
|
+
function next(s::RepString, i::Int)
|
241
|
+
if i < 1 || i > length(s)
|
242
|
+
error("string index out of bounds")
|
243
|
+
end
|
244
|
+
j = mod1(i,length(s.string))
|
245
|
+
c, k = next(s.string, j)
|
246
|
+
c, k-j+i
|
247
|
+
end
|
248
|
+
|
249
|
+
function repeat(s::String, r::Integer)
|
250
|
+
r < 0 ? error("can't repeat a string ",r," times") :
|
251
|
+
r == 0 ? "" :
|
252
|
+
r == 1 ? s :
|
253
|
+
RepString(s,r)
|
254
|
+
end
|
255
|
+
|
256
|
+
## reversed strings without data movement ##
|
257
|
+
|
258
|
+
type RevString <: String
|
259
|
+
string::String
|
260
|
+
end
|
261
|
+
|
262
|
+
length(s::RevString) = length(s.string)
|
263
|
+
strlen(s::RevString) = strlen(s.string)
|
264
|
+
|
265
|
+
start(s::RevString) = (n=length(s); n-thisind(s.string,n)+1)
|
266
|
+
function next(s::RevString, i::Int)
|
267
|
+
n = length(s); j = n-i+1
|
268
|
+
(s.string[j], n-thisind(s.string,j-1)+1)
|
269
|
+
end
|
270
|
+
|
271
|
+
reverse(s::String) = RevString(s)
|
272
|
+
reverse(s::RevString) = s.string
|
273
|
+
|
274
|
+
## ropes for efficient concatenation, etc. ##
|
275
|
+
|
276
|
+
# Idea: instead of this standard binary tree structure,
|
277
|
+
# how about we keep an array of substrings, with an
|
278
|
+
# offset array. We can do binary search on the offset
|
279
|
+
# array so we get O(log(n)) indexing time still, but we
|
280
|
+
# can compute the offsets lazily and avoid all the
|
281
|
+
# futzing around while the string is being constructed.
|
282
|
+
|
283
|
+
type RopeString <: String
|
284
|
+
head::String
|
285
|
+
tail::String
|
286
|
+
depth::Int32
|
287
|
+
length::Int
|
288
|
+
|
289
|
+
RopeString(h::RopeString, t::RopeString) =
|
290
|
+
depth(h.tail) + depth(t) < depth(h.head) ?
|
291
|
+
RopeString(h.head, RopeString(h.tail, t)) :
|
292
|
+
new(h, t, max(h.depth,t.depth)+1, length(h)+length(t))
|
293
|
+
|
294
|
+
RopeString(h::RopeString, t::String) =
|
295
|
+
depth(h.tail) < depth(h.head) ?
|
296
|
+
RopeString(h.head, RopeString(h.tail, t)) :
|
297
|
+
new(h, t, h.depth+1, length(h)+length(t))
|
298
|
+
|
299
|
+
RopeString(h::String, t::RopeString) =
|
300
|
+
depth(t.head) < depth(t.tail) ?
|
301
|
+
RopeString(RopeString(h, t.head), t.tail) :
|
302
|
+
new(h, t, t.depth+1, length(h)+length(t))
|
303
|
+
|
304
|
+
RopeString(h::String, t::String) =
|
305
|
+
new(h, t, 1, length(h)+length(t))
|
306
|
+
end
|
307
|
+
|
308
|
+
depth(s::String) = 0
|
309
|
+
depth(s::RopeString) = s.depth
|
310
|
+
|
311
|
+
function next(s::RopeString, i::Int)
|
312
|
+
if i <= length(s.head)
|
313
|
+
return next(s.head, i)
|
314
|
+
else
|
315
|
+
c, j = next(s.tail, i-length(s.head))
|
316
|
+
return c, j+length(s.head)
|
317
|
+
end
|
318
|
+
end
|
319
|
+
|
320
|
+
length(s::RopeString) = s.length
|
321
|
+
strlen(s::RopeString) = strlen(s.head) + strlen(s.tail)
|
322
|
+
|
323
|
+
strcat() = ""
|
324
|
+
strcat(s::String) = s
|
325
|
+
strcat(x...) = strcat(map(string,x)...)
|
326
|
+
strcat(s::String, t::String...) =
|
327
|
+
(t = strcat(t...); isempty(s) ? t : isempty(t) ? s : RopeString(s, t))
|
328
|
+
|
329
|
+
print(s::RopeString) = print(s.head, s.tail)
|
330
|
+
|
331
|
+
## transformed strings ##
|
332
|
+
|
333
|
+
type TransformedString <: String
|
334
|
+
transform::Function
|
335
|
+
string::String
|
336
|
+
end
|
337
|
+
|
338
|
+
length(s::TransformedString) = length(s.string)
|
339
|
+
strlen(s::TransformedString) = strlen(s.string)
|
340
|
+
|
341
|
+
function next(s::TransformedString, i::Int)
|
342
|
+
c, j = next(s.string,i)
|
343
|
+
c = s.transform(c, i)
|
344
|
+
return c, j
|
345
|
+
end
|
346
|
+
|
347
|
+
## uppercase and lowercase transformations ##
|
348
|
+
|
349
|
+
uppercase(c::Char) = ccall(:towupper, Char, (Char,), c)
|
350
|
+
lowercase(c::Char) = ccall(:towlower, Char, (Char,), c)
|
351
|
+
|
352
|
+
uppercase(s::String) = TransformedString((c,i)->uppercase(c), s)
|
353
|
+
lowercase(s::String) = TransformedString((c,i)->lowercase(c), s)
|
354
|
+
|
355
|
+
ucfirst(s::String) = TransformedString((c,i)->i==1 ? uppercase(c) : c, s)
|
356
|
+
lcfirst(s::String) = TransformedString((c,i)->i==1 ? lowercase(c) : c, s)
|
357
|
+
|
358
|
+
const uc = uppercase
|
359
|
+
const lc = lowercase
|
360
|
+
|
361
|
+
## string map ##
|
362
|
+
|
363
|
+
function map(f::Function, s::String)
|
364
|
+
out = memio(length(s))
|
365
|
+
for c in s
|
366
|
+
write(out, f(c)::Char)
|
367
|
+
end
|
368
|
+
takebuf_string(out)
|
369
|
+
end
|
370
|
+
|
371
|
+
## conversion of general objects to strings ##
|
372
|
+
|
373
|
+
string(x) = print_to_string(show, x)
|
374
|
+
cstring(x...) = print_to_string(print, x...)
|
375
|
+
|
376
|
+
function cstring(p::Ptr{Uint8})
|
377
|
+
p == C_NULL ? error("cannot convert NULL to string") :
|
378
|
+
ccall(:jl_cstr_to_string, Any, (Ptr{Uint8},), p)::ByteString
|
379
|
+
end
|
380
|
+
|
381
|
+
## string promotion rules ##
|
382
|
+
|
383
|
+
promote_rule(::Type{UTF8String} , ::Type{ASCIIString}) = UTF8String
|
384
|
+
promote_rule(::Type{UTF8String} , ::Type{CharString} ) = UTF8String
|
385
|
+
promote_rule(::Type{ASCIIString}, ::Type{CharString} ) = UTF8String
|
386
|
+
|
387
|
+
## printing literal quoted string data ##
|
388
|
+
|
389
|
+
# TODO: this is really the inverse of print_unbackslashed
|
390
|
+
|
391
|
+
function print_quoted_literal(s::String)
|
392
|
+
print('"')
|
393
|
+
for c = s; c == '"' ? print("\\\"") : print(c); end
|
394
|
+
print('"')
|
395
|
+
end
|
396
|
+
|
397
|
+
## string escaping & unescaping ##
|
398
|
+
|
399
|
+
escape_nul(s::String, i::Int) =
|
400
|
+
!done(s,i) && '0' <= next(s,i)[1] <= '7' ? L"\x00" : L"\0"
|
401
|
+
|
402
|
+
is_hex_digit(c::Char) = '0'<=c<='9' || 'a'<=c<='f' || 'A'<=c<='F'
|
403
|
+
need_full_hex(s::String, i::Int) = !done(s,i) && is_hex_digit(next(s,i)[1])
|
404
|
+
|
405
|
+
function print_escaped(s::String, esc::String)
|
406
|
+
i = start(s)
|
407
|
+
while !done(s,i)
|
408
|
+
c, j = next(s,i)
|
409
|
+
c == '\0' ? print(escape_nul(s,j)) :
|
410
|
+
c == '\e' ? print(L"\e") :
|
411
|
+
c == '\\' ? print("\\\\") :
|
412
|
+
contains(esc,c) ? print('\\', c) :
|
413
|
+
iswprint(c) ? print(c) :
|
414
|
+
7 <= c <= 13 ? print('\\', "abtnvfr"[c-6]) :
|
415
|
+
c <= '\x7f' ? print(L"\x", hex(c, 2)) :
|
416
|
+
c <= '\uffff' ? print(L"\u", hex(c, need_full_hex(s,j) ? 4 : 2)) :
|
417
|
+
print(L"\U", hex(c, need_full_hex(s,j) ? 8 : 4))
|
418
|
+
i = j
|
419
|
+
end
|
420
|
+
end
|
421
|
+
|
422
|
+
escape_string(s::String) = print_to_string(length(s), print_escaped, s, "\"")
|
423
|
+
print_quoted(s::String) = (print('"'); print_escaped(s, "\"\$"); print('"'))
|
424
|
+
#" # work around syntax highlighting problem
|
425
|
+
quote_string(s::String) = print_to_string(length(s)+2, print_quoted, s)
|
426
|
+
|
427
|
+
# bare minimum unescaping function unescapes only given characters
|
428
|
+
|
429
|
+
function print_unescaped_chars(s::String, esc::String)
|
430
|
+
if !contains(esc,'\\')
|
431
|
+
esc = strcat("\\", esc)
|
432
|
+
end
|
433
|
+
i = start(s)
|
434
|
+
while !done(s,i)
|
435
|
+
c, i = next(s,i)
|
436
|
+
if c == '\\' && !done(s,i) && contains(esc,s[i])
|
437
|
+
c, i = next(s,i)
|
438
|
+
end
|
439
|
+
print(c)
|
440
|
+
end
|
441
|
+
end
|
442
|
+
|
443
|
+
unescape_chars(s::String, esc::String) =
|
444
|
+
print_to_string(length(s), print_unescaped_chars, s, esc)
|
445
|
+
|
446
|
+
# general unescaping of traditional C and Unicode escape sequences
|
447
|
+
|
448
|
+
function print_unescaped(s::String)
|
449
|
+
i = start(s)
|
450
|
+
while !done(s,i)
|
451
|
+
c, i = next(s,i)
|
452
|
+
if !done(s,i) && c == '\\'
|
453
|
+
c, i = next(s,i)
|
454
|
+
if c == 'x' || c == 'u' || c == 'U'
|
455
|
+
n = k = 0
|
456
|
+
m = c == 'x' ? 2 :
|
457
|
+
c == 'u' ? 4 : 8
|
458
|
+
while (k+=1) <= m && !done(s,i)
|
459
|
+
c, j = next(s,i)
|
460
|
+
n = '0' <= c <= '9' ? n<<4 + c-'0' :
|
461
|
+
'a' <= c <= 'f' ? n<<4 + c-'a'+10 :
|
462
|
+
'A' <= c <= 'F' ? n<<4 + c-'A'+10 : break
|
463
|
+
i = j
|
464
|
+
end
|
465
|
+
if k == 1
|
466
|
+
error("\\x used with no following hex digits")
|
467
|
+
end
|
468
|
+
if m == 2 # \x escape sequence
|
469
|
+
write(uint8(n))
|
470
|
+
else
|
471
|
+
print(char(n))
|
472
|
+
end
|
473
|
+
elseif '0' <= c <= '7'
|
474
|
+
k = 1
|
475
|
+
n = c-'0'
|
476
|
+
while (k+=1) <= 3 && !done(s,i)
|
477
|
+
c, j = next(s,i)
|
478
|
+
n = '0' <= c <= '7' ? n<<3 + c-'0' : break
|
479
|
+
i = j
|
480
|
+
end
|
481
|
+
if n > 255
|
482
|
+
error("octal escape sequence out of range")
|
483
|
+
end
|
484
|
+
write(uint8(n))
|
485
|
+
else
|
486
|
+
print(c == 'a' ? '\a' :
|
487
|
+
c == 'b' ? '\b' :
|
488
|
+
c == 't' ? '\t' :
|
489
|
+
c == 'n' ? '\n' :
|
490
|
+
c == 'v' ? '\v' :
|
491
|
+
c == 'f' ? '\f' :
|
492
|
+
c == 'r' ? '\r' :
|
493
|
+
c == 'e' ? '\e' : c)
|
494
|
+
end
|
495
|
+
else
|
496
|
+
print(c)
|
497
|
+
end
|
498
|
+
end
|
499
|
+
end
|
500
|
+
|
501
|
+
unescape_string(s::String) = print_to_string(length(s), print_unescaped, s)
|
502
|
+
|
503
|
+
## checking UTF-8 & ACSII validity ##
|
504
|
+
|
505
|
+
byte_string_classify(s::ByteString) =
|
506
|
+
ccall(:u8_isvalid, Int32, (Ptr{Uint8}, Int), s.data, length(s))
|
507
|
+
# 0: neither valid ASCII nor UTF-8
|
508
|
+
# 1: valid ASCII
|
509
|
+
# 2: valid UTF-8
|
510
|
+
|
511
|
+
is_valid_ascii(s::ByteString) = byte_string_classify(s) == 1
|
512
|
+
is_valid_utf8 (s::ByteString) = byte_string_classify(s) != 0
|
513
|
+
|
514
|
+
check_ascii(s::ByteString) = is_valid_ascii(s) ? s : error("invalid ASCII sequence")
|
515
|
+
check_utf8 (s::ByteString) = is_valid_utf8(s) ? s : error("invalid UTF-8 sequence")
|
516
|
+
|
517
|
+
## string interpolation parsing ##
|
518
|
+
|
519
|
+
function _jl_interp_parse(s::String, unescape::Function, printer::Function)
|
520
|
+
sx = {}
|
521
|
+
i = j = start(s)
|
522
|
+
while !done(s,j)
|
523
|
+
c, k = next(s,j)
|
524
|
+
if c == '$'
|
525
|
+
if !isempty(s[i:j-1])
|
526
|
+
push(sx, unescape(s[i:j-1]))
|
527
|
+
end
|
528
|
+
ex, j = parseatom(s,k)
|
529
|
+
push(sx, ex)
|
530
|
+
i = j
|
531
|
+
elseif c == '\\' && !done(s,k)
|
532
|
+
if s[k] == '$'
|
533
|
+
if !isempty(s[i:j-1])
|
534
|
+
push(sx, unescape(s[i:j-1]))
|
535
|
+
end
|
536
|
+
i = k
|
537
|
+
end
|
538
|
+
c, j = next(s,k)
|
539
|
+
else
|
540
|
+
j = k
|
541
|
+
end
|
542
|
+
end
|
543
|
+
if !isempty(s[i:])
|
544
|
+
push(sx, unescape(s[i:j-1]))
|
545
|
+
end
|
546
|
+
length(sx) == 1 && isa(sx[1],ByteString) ? sx[1] :
|
547
|
+
expr(:call, :print_to_string, printer, sx...)
|
548
|
+
end
|
549
|
+
|
550
|
+
_jl_interp_parse(s::String, u::Function) = _jl_interp_parse(s, u, print)
|
551
|
+
_jl_interp_parse(s::String) = _jl_interp_parse(s, x->check_utf8(unescape_string(x)))
|
552
|
+
|
553
|
+
function _jl_interp_parse_bytes(s::String)
|
554
|
+
writer(x...) = for w=x; write(w); end
|
555
|
+
_jl_interp_parse(s, unescape_string, writer)
|
556
|
+
end
|
557
|
+
|
558
|
+
## core string macros ##
|
559
|
+
|
560
|
+
macro str(s); _jl_interp_parse(s); end
|
561
|
+
macro S_str(s); _jl_interp_parse(s); end
|
562
|
+
macro I_str(s); _jl_interp_parse(s, x->unescape_chars(x,"\"")); end
|
563
|
+
macro E_str(s); check_utf8(unescape_string(s)); end
|
564
|
+
macro B_str(s); _jl_interp_parse_bytes(s); end
|
565
|
+
macro b_str(s); ex = _jl_interp_parse_bytes(s); :(($ex).data); end
|
566
|
+
|
567
|
+
## shell-like command parsing ##
|
568
|
+
|
569
|
+
function _jl_shell_parse(s::String, interp::Bool)
|
570
|
+
|
571
|
+
in_single_quotes = false
|
572
|
+
in_double_quotes = false
|
573
|
+
|
574
|
+
args = {}
|
575
|
+
arg = {}
|
576
|
+
i = start(s)
|
577
|
+
j = i
|
578
|
+
|
579
|
+
function update_arg(x)
|
580
|
+
if !isa(x,String) || !isempty(x)
|
581
|
+
push(arg, x)
|
582
|
+
end
|
583
|
+
end
|
584
|
+
function append_arg()
|
585
|
+
if isempty(arg); arg = {"",}; end
|
586
|
+
push(args, arg)
|
587
|
+
arg = {}
|
588
|
+
end
|
589
|
+
|
590
|
+
while !done(s,j)
|
591
|
+
c, k = next(s,j)
|
592
|
+
if !in_single_quotes && !in_double_quotes && iswspace(c)
|
593
|
+
update_arg(s[i:j-1])
|
594
|
+
append_arg()
|
595
|
+
j = k
|
596
|
+
while !done(s,j)
|
597
|
+
c, k = next(s,j)
|
598
|
+
if !iswspace(c)
|
599
|
+
i = j
|
600
|
+
break
|
601
|
+
end
|
602
|
+
j = k
|
603
|
+
end
|
604
|
+
elseif interp && !in_single_quotes && c == '$'
|
605
|
+
update_arg(s[i:j-1]); i = k; j = k
|
606
|
+
if done(s,k)
|
607
|
+
error("\$ right before end of command")
|
608
|
+
end
|
609
|
+
if iswspace(s[k])
|
610
|
+
error("space not allowed right after \$")
|
611
|
+
end
|
612
|
+
ex, j = parseatom(s,j)
|
613
|
+
update_arg(ex); i = j
|
614
|
+
else
|
615
|
+
if !in_double_quotes && c == '\''
|
616
|
+
in_single_quotes = !in_single_quotes
|
617
|
+
update_arg(s[i:j-1]); i = k
|
618
|
+
elseif !in_single_quotes && c == '"'
|
619
|
+
in_double_quotes = !in_double_quotes
|
620
|
+
update_arg(s[i:j-1]); i = k
|
621
|
+
elseif c == '\\'
|
622
|
+
if in_double_quotes
|
623
|
+
if done(s,k)
|
624
|
+
error("unterminated double quote")
|
625
|
+
end
|
626
|
+
if s[k] == '"' || s[k] == '$'
|
627
|
+
update_arg(s[i:j-1]); i = k
|
628
|
+
c, k = next(s,k)
|
629
|
+
end
|
630
|
+
elseif !in_single_quotes
|
631
|
+
if done(s,k)
|
632
|
+
error("dangling backslash")
|
633
|
+
end
|
634
|
+
update_arg(s[i:j-1]); i = k
|
635
|
+
c, k = next(s,k)
|
636
|
+
end
|
637
|
+
end
|
638
|
+
j = k
|
639
|
+
end
|
640
|
+
end
|
641
|
+
|
642
|
+
if in_single_quotes; error("unterminated single quote"); end
|
643
|
+
if in_double_quotes; error("unterminated double quote"); end
|
644
|
+
|
645
|
+
update_arg(s[i:])
|
646
|
+
append_arg()
|
647
|
+
|
648
|
+
if !interp
|
649
|
+
return args
|
650
|
+
end
|
651
|
+
|
652
|
+
# construct an expression
|
653
|
+
exprs = {}
|
654
|
+
for arg in args
|
655
|
+
push(exprs, expr(:tuple, arg))
|
656
|
+
end
|
657
|
+
expr(:tuple,exprs)
|
658
|
+
end
|
659
|
+
_jl_shell_parse(s::String) = _jl_shell_parse(s,true)
|
660
|
+
|
661
|
+
function shell_split(s::String)
|
662
|
+
parsed = _jl_shell_parse(s,false)
|
663
|
+
args = String[]
|
664
|
+
for arg in parsed
|
665
|
+
push(args, strcat(arg...))
|
666
|
+
end
|
667
|
+
args
|
668
|
+
end
|
669
|
+
|
670
|
+
function print_shell_word(word::String)
|
671
|
+
if isempty(word)
|
672
|
+
print("''")
|
673
|
+
end
|
674
|
+
has_single = false
|
675
|
+
has_special = false
|
676
|
+
for c in word
|
677
|
+
if iswspace(c) || c=='\\' || c=='\'' || c=='"' || c=='$'
|
678
|
+
has_special = true
|
679
|
+
if c == '\''
|
680
|
+
has_single = true
|
681
|
+
end
|
682
|
+
end
|
683
|
+
end
|
684
|
+
if !has_special
|
685
|
+
print(word)
|
686
|
+
elseif !has_single
|
687
|
+
print('\'', word, '\'')
|
688
|
+
else
|
689
|
+
print('"')
|
690
|
+
for c in word
|
691
|
+
if c == '"' || c == '$'
|
692
|
+
print('\\')
|
693
|
+
end
|
694
|
+
print(c)
|
695
|
+
end
|
696
|
+
print('"')
|
697
|
+
end
|
698
|
+
end
|
699
|
+
|
700
|
+
function print_shell_escaped(cmd::String, args::String...)
|
701
|
+
print_shell_word(cmd)
|
702
|
+
for arg in args
|
703
|
+
print(' ')
|
704
|
+
print_shell_word(arg)
|
705
|
+
end
|
706
|
+
end
|
707
|
+
|
708
|
+
shell_escape(cmd::String, args::String...) =
|
709
|
+
print_to_string(print_shell_escaped, cmd, args...)
|
710
|
+
|
711
|
+
## interface to parser ##
|
712
|
+
|
713
|
+
function parse(s::String, pos, greedy)
|
714
|
+
# returns (expr, end_pos). expr is () in case of parse error.
|
715
|
+
ex, pos = ccall(:jl_parse_string, Any,
|
716
|
+
(Ptr{Uint8}, Int32, Int32),
|
717
|
+
cstring(s), pos-1, greedy ? 1:0)
|
718
|
+
if isa(ex,Expr) && is(ex.head,:error)
|
719
|
+
throw(ParseError(ex.args[1]))
|
720
|
+
end
|
721
|
+
if ex == (); throw(ParseError("end of input")); end
|
722
|
+
ex, pos+1 # C is zero-based, Julia is 1-based
|
723
|
+
end
|
724
|
+
|
725
|
+
parse(s::String) = parse(s, 1, true)
|
726
|
+
parse(s::String, pos) = parse(s, pos, true)
|
727
|
+
parseatom(s::String) = parse(s, 1, false)
|
728
|
+
parseatom(s::String, pos) = parse(s, pos, false)
|
729
|
+
|
730
|
+
## miscellaneous string functions ##
|
731
|
+
|
732
|
+
function lpad(s::String, n::Integer, p::String)
|
733
|
+
m = n - strlen(s)
|
734
|
+
if m <= 0; return s; end
|
735
|
+
l = strlen(p)
|
736
|
+
if l==1
|
737
|
+
return p^m * s
|
738
|
+
end
|
739
|
+
q = div(m,l)
|
740
|
+
r = m - q*l
|
741
|
+
cstring(p^q*p[1:chr2ind(p,r)]*s)
|
742
|
+
end
|
743
|
+
|
744
|
+
function rpad(s::String, n::Integer, p::String)
|
745
|
+
m = n - strlen(s)
|
746
|
+
if m <= 0; return s; end
|
747
|
+
l = strlen(p)
|
748
|
+
if l==1
|
749
|
+
return s * p^m
|
750
|
+
end
|
751
|
+
q = div(m,l)
|
752
|
+
r = m - q*l
|
753
|
+
cstring(s*p^q*p[1:chr2ind(p,r)])
|
754
|
+
end
|
755
|
+
|
756
|
+
lpad(s, n::Integer, p) = lpad(string(s), n, string(p))
|
757
|
+
rpad(s, n::Integer, p) = rpad(string(s), n, string(p))
|
758
|
+
|
759
|
+
lpad(s, n::Integer) = lpad(string(s), n, " ")
|
760
|
+
rpad(s, n::Integer) = rpad(string(s), n, " ")
|
761
|
+
|
762
|
+
function split(s::String, delims, include_empty::Bool)
|
763
|
+
i = 1
|
764
|
+
strs = String[]
|
765
|
+
len = length(s)
|
766
|
+
while true
|
767
|
+
tokstart = tokend = i
|
768
|
+
while !done(s,i)
|
769
|
+
(c,i) = next(s,i)
|
770
|
+
if contains(delims, c)
|
771
|
+
break
|
772
|
+
end
|
773
|
+
tokend = i
|
774
|
+
end
|
775
|
+
tok = s[tokstart:(tokend-1)]
|
776
|
+
if include_empty || !isempty(tok)
|
777
|
+
push(strs, tok)
|
778
|
+
end
|
779
|
+
if !((i <= len) || (i==len+1 && tokend!=i))
|
780
|
+
break
|
781
|
+
end
|
782
|
+
end
|
783
|
+
strs
|
784
|
+
end
|
785
|
+
|
786
|
+
split(s::String) = split(s, (' ','\t','\n','\v','\f','\r'), false)
|
787
|
+
split(s::String, x) = split(s, x, true)
|
788
|
+
split(s::String, x::Char, incl::Bool) = split(s, (x,), incl)
|
789
|
+
|
790
|
+
function print_joined(strings, delim, last)
|
791
|
+
i = start(strings)
|
792
|
+
if done(strings,i)
|
793
|
+
return
|
794
|
+
end
|
795
|
+
str, i = next(strings,i)
|
796
|
+
print(str)
|
797
|
+
while !done(strings,i)
|
798
|
+
str, i = next(strings,i)
|
799
|
+
print(done(strings,i) ? last : delim)
|
800
|
+
print(str)
|
801
|
+
end
|
802
|
+
end
|
803
|
+
|
804
|
+
function print_joined(strings, delim)
|
805
|
+
i = start(strings)
|
806
|
+
while !done(strings,i)
|
807
|
+
str, i = next(strings,i)
|
808
|
+
print(str)
|
809
|
+
if !done(strings,i)
|
810
|
+
print(delim)
|
811
|
+
end
|
812
|
+
end
|
813
|
+
end
|
814
|
+
print_joined(strings) = print_joined(strings, "")
|
815
|
+
|
816
|
+
join(args...) = print_to_string(print_joined, args...)
|
817
|
+
|
818
|
+
chop(s::String) = s[1:thisind(s,length(s))-1]
|
819
|
+
chomp(s::String) = (i=thisind(s,length(s)); s[i]=='\n' ? s[1:i-1] : s)
|
820
|
+
chomp(s::ByteString) = s.data[end]==0x0a ? s[1:end-1] : s
|
821
|
+
|
822
|
+
function lstrip(s::String)
|
823
|
+
i = start(s)
|
824
|
+
while !done(s,i)
|
825
|
+
c, j = next(s,i)
|
826
|
+
if !iswspace(c)
|
827
|
+
return s[i:end]
|
828
|
+
end
|
829
|
+
i = j
|
830
|
+
end
|
831
|
+
""
|
832
|
+
end
|
833
|
+
|
834
|
+
function rstrip(s::String)
|
835
|
+
r = reverse(s)
|
836
|
+
i = start(r)
|
837
|
+
while !done(r,i)
|
838
|
+
c, j = next(r,i)
|
839
|
+
if !iswspace(c)
|
840
|
+
return s[1:end-i+1]
|
841
|
+
end
|
842
|
+
i = j
|
843
|
+
end
|
844
|
+
""
|
845
|
+
end
|
846
|
+
|
847
|
+
strip(s::String) = lstrip(rstrip(s))
|
848
|
+
|
849
|
+
## string to integer functions ##
|
850
|
+
|
851
|
+
function parse_int{T<:Integer}(::Type{T}, s::String, base::Integer)
|
852
|
+
if !(2 <= base <= 36); error("invalid base: ",base); end
|
853
|
+
i = start(s)
|
854
|
+
if done(s,i)
|
855
|
+
error("premature end of integer (in ",show_to_string(s),")")
|
856
|
+
end
|
857
|
+
c,i = next(s,i)
|
858
|
+
sgn = one(T)
|
859
|
+
if T <: Signed && c == '-'
|
860
|
+
sgn = -sgn
|
861
|
+
if done(s,i)
|
862
|
+
error("premature end of integer (in ",show_to_string(s),")")
|
863
|
+
end
|
864
|
+
c,i = next(s,i)
|
865
|
+
end
|
866
|
+
base = convert(T,base)
|
867
|
+
n::T = 0
|
868
|
+
while true
|
869
|
+
d = '0' <= c <= '9' ? c-'0' :
|
870
|
+
'A' <= c <= 'Z' ? c-'A'+10 :
|
871
|
+
'a' <= c <= 'z' ? c-'a'+10 : typemax(Int)
|
872
|
+
if d >= base
|
873
|
+
error(show_to_string(c)," is not a valid digit (in ",show_to_string(s),")")
|
874
|
+
end
|
875
|
+
# TODO: overflow detection?
|
876
|
+
n = n*base + d
|
877
|
+
if done(s,i)
|
878
|
+
break
|
879
|
+
end
|
880
|
+
c,i = next(s,i)
|
881
|
+
end
|
882
|
+
return flipsign(n,sgn)
|
883
|
+
end
|
884
|
+
|
885
|
+
parse_int(s::String, base::Integer) = parse_int(Int,s,base)
|
886
|
+
parse_int(T::Type, s::String) = parse_int(T,s,10)
|
887
|
+
parse_int(s::String) = parse_int(Int,s,10)
|
888
|
+
|
889
|
+
parse_bin(T::Type, s::String) = parse_int(T,s,2)
|
890
|
+
parse_oct(T::Type, s::String) = parse_int(T,s,8)
|
891
|
+
parse_hex(T::Type, s::String) = parse_int(T,s,16)
|
892
|
+
|
893
|
+
parse_bin(s::String) = parse_int(Int,s,2)
|
894
|
+
parse_oct(s::String) = parse_int(Int,s,8)
|
895
|
+
parse_hex(s::String) = parse_int(Int,s,16)
|
896
|
+
|
897
|
+
integer (s::String) = int(s)
|
898
|
+
unsigned(s::String) = uint(s)
|
899
|
+
int (s::String) = parse_int(Int,s)
|
900
|
+
uint (s::String) = parse_int(Uint,s)
|
901
|
+
int8 (s::String) = parse_int(Int8,s)
|
902
|
+
uint8 (s::String) = parse_int(Uint8,s)
|
903
|
+
int16 (s::String) = parse_int(Int16,s)
|
904
|
+
uint16 (s::String) = parse_int(Uint16,s)
|
905
|
+
int32 (s::String) = parse_int(Int32,s)
|
906
|
+
uint32 (s::String) = parse_int(Uint32,s)
|
907
|
+
int64 (s::String) = parse_int(Int64,s)
|
908
|
+
uint64 (s::String) = parse_int(Uint64,s)
|
909
|
+
|
910
|
+
## integer to string functions ##
|
911
|
+
|
912
|
+
const _jl_dig_syms = "0123456789abcdefghijklmnopqrstuvwxyz".data
|
913
|
+
|
914
|
+
function int2str(n::Union(Int64,Uint64), b::Integer, l::Int)
|
915
|
+
if b < 2 || b > 36; error("int2str: invalid base ", b); end
|
916
|
+
neg = n < 0
|
917
|
+
n = unsigned(abs(n))
|
918
|
+
b = convert(typeof(n), b)
|
919
|
+
ndig = ndigits(n, b)
|
920
|
+
sz = max(convert(Int, ndig), l) + neg
|
921
|
+
data = Array(Uint8, sz)
|
922
|
+
i = sz
|
923
|
+
if ispow2(b)
|
924
|
+
digmask = b-1
|
925
|
+
shift = trailing_zeros(b)
|
926
|
+
while i > neg
|
927
|
+
ch = n & digmask
|
928
|
+
data[i] = _jl_dig_syms[int(ch)+1]
|
929
|
+
n >>= shift
|
930
|
+
i -= 1
|
931
|
+
end
|
932
|
+
else
|
933
|
+
while i > neg
|
934
|
+
ch = n % b
|
935
|
+
data[i] = _jl_dig_syms[int(ch)+1]
|
936
|
+
n = div(n,b)
|
937
|
+
i -= 1
|
938
|
+
end
|
939
|
+
end
|
940
|
+
if neg
|
941
|
+
data[1] = '-'
|
942
|
+
end
|
943
|
+
ASCIIString(data)
|
944
|
+
end
|
945
|
+
int2str(n::Integer, b::Integer) = int2str(n, b, 0)
|
946
|
+
int2str(n::Integer, b::Integer, l::Int) = int2str(int64(n), b, l)
|
947
|
+
|
948
|
+
string(x::Signed) = dec(int64(x))
|
949
|
+
cstring(x::Signed) = dec(int64(x))
|
950
|
+
|
951
|
+
## string to float functions ##
|
952
|
+
|
953
|
+
function float64_isvalid(s::String, out::Array{Float64,1})
|
954
|
+
s = cstring(s)
|
955
|
+
return (ccall(:jl_strtod, Int32, (Ptr{Uint8},Ptr{Float64}), s, out)==0)
|
956
|
+
end
|
957
|
+
|
958
|
+
function float32_isvalid(s::String, out::Array{Float32,1})
|
959
|
+
s = cstring(s)
|
960
|
+
return (ccall(:jl_strtof, Int32, (Ptr{Uint8},Ptr{Float32}), s, out)==0)
|
961
|
+
end
|
962
|
+
|
963
|
+
begin
|
964
|
+
local tmp::Array{Float64,1} = Array(Float64,1)
|
965
|
+
local tmpf::Array{Float32,1} = Array(Float32,1)
|
966
|
+
global float64, float32
|
967
|
+
function float64(s::String)
|
968
|
+
if !float64_isvalid(s, tmp)
|
969
|
+
throw(ArgumentError("float64(String): invalid number format"))
|
970
|
+
end
|
971
|
+
return tmp[1]
|
972
|
+
end
|
973
|
+
|
974
|
+
function float32(s::String)
|
975
|
+
if !float32_isvalid(s, tmpf)
|
976
|
+
throw(ArgumentError("float32(String): invalid number format"))
|
977
|
+
end
|
978
|
+
return tmpf[1]
|
979
|
+
end
|
980
|
+
end
|
981
|
+
|
982
|
+
float(x::String) = float64(x)
|
983
|
+
parse_float(x::String) = float64(x)
|
984
|
+
parse_float(::Type{Float64}, x::String) = float64(x)
|
985
|
+
parse_float(::Type{Float32}, x::String) = float32(x)
|
986
|
+
|
987
|
+
# copying a byte string (generally not needed due to "immutability")
|
988
|
+
|
989
|
+
strcpy{T<:ByteString}(s::T) = T(copy(s.data))
|
990
|
+
|
991
|
+
# lexicographically compare byte arrays (used by Latin-1 and UTF-8)
|
992
|
+
|
993
|
+
function lexcmp(a::Array{Uint8,1}, b::Array{Uint8,1})
|
994
|
+
c = ccall(:memcmp, Int32, (Ptr{Uint8}, Ptr{Uint8}, Uint),
|
995
|
+
a, b, min(length(a),length(b)))
|
996
|
+
c < 0 ? -1 : c > 0 ? +1 : cmp(length(a),length(b))
|
997
|
+
end
|
998
|
+
|
999
|
+
# find the index of the first occurrence of a byte value in a byte array
|
1000
|
+
|
1001
|
+
function memchr(a::Array{Uint8,1}, b::Integer)
|
1002
|
+
p = pointer(a)
|
1003
|
+
q = ccall(:memchr, Ptr{Uint8}, (Ptr{Uint8}, Int32, Uint), p, b, length(a))
|
1004
|
+
q == C_NULL ? 0 : q - p + 1
|
1005
|
+
end
|
1006
|
+
|
1007
|
+
# concatenate byte arrays into a single array
|
1008
|
+
|
1009
|
+
memcat() = Array(Uint8,0)
|
1010
|
+
memcat(a::Array{Uint8,1}) = copy(a)
|
1011
|
+
|
1012
|
+
function memcat(arrays::Array{Uint8,1}...)
|
1013
|
+
n = 0
|
1014
|
+
for a in arrays
|
1015
|
+
n += length(a)
|
1016
|
+
end
|
1017
|
+
arr = Array(Uint8, n)
|
1018
|
+
ptr = pointer(arr)
|
1019
|
+
offset = 0
|
1020
|
+
for a in arrays
|
1021
|
+
ccall(:memcpy, Ptr{Uint8}, (Ptr{Uint8}, Ptr{Uint8}, Uint),
|
1022
|
+
ptr+offset, a, length(a))
|
1023
|
+
offset += length(a)
|
1024
|
+
end
|
1025
|
+
return arr
|
1026
|
+
end
|
1027
|
+
|
1028
|
+
# concatenate the data fields of byte strings
|
1029
|
+
|
1030
|
+
memcat(s::ByteString) = memcat(s.data)
|
1031
|
+
memcat(sx::ByteString...) = memcat(map(s->s.data, sx)...)
|