dataMetaParse 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +1 -0
- data/History.md +5 -0
- data/PostInstall.txt +2 -0
- data/README.md +53 -0
- data/Rakefile +13 -0
- data/lib/dataMetaParse.rb +58 -0
- data/lib/dataMetaParse/basic.treetop +236 -0
- data/lib/dataMetaParse/uri.treetop +229 -0
- data/lib/dataMetaParse/uriDataMeta.rb +139 -0
- data/lib/dataMetaParse/urlDataMeta.treetop +110 -0
- data/test/numbers.treetop +23 -0
- data/test/test_helper.rb +6 -0
- data/test/test_numbers.rb +52 -0
- data/test/test_uriParser.rb +162 -0
- data/test/utils.rb +12 -0
- metadata +83 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 7a0e72fa6c8ea304157a65156fb66f908b74bef4
|
4
|
+
data.tar.gz: 0cb917be27d2ca056bc345e277e3377dcbdd1334
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 4aa00db56ca8992e43119ac430759bc0429390b8c3dc6f1f4ecfc0000eebd87e711ac5ea4852472f10d15e50868ec0a9d97c320aa6f8c50325967e0f4b4dfeff
|
7
|
+
data.tar.gz: d94b08fa48f84f458d8bc18c5e5ccc9feebba35c6e62caaf58070c9188157fe204285b906271d5ea6b1e9db874c2ccdc3e04b1948db0c5075bc6d833332d369a
|
data/.yardopts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--title "DataMeta Parsing Utils" -r README.md --charset UTF-8 lib/**/*rb - README.md
|
data/History.md
ADDED
data/PostInstall.txt
ADDED
data/README.md
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
# dataMetaParse
|
2
|
+
|
3
|
+
DataMeta Parser commons: common rules and some reusable grammars
|
4
|
+
|
5
|
+
References to this gem's:
|
6
|
+
|
7
|
+
* [Source](https://github.com/eBayDataMeta/DataMeta-gems)
|
8
|
+
|
9
|
+
## DESCRIPTION:
|
10
|
+
|
11
|
+
See the [DataMeta Project](https://github.com/eBayDataMeta/DataMeta)
|
12
|
+
|
13
|
+
## FEATURES/PROBLEMS:
|
14
|
+
|
15
|
+
* This gem uses [treetop](http://treetop.rubyforge.org) for grammar processing which only works with
|
16
|
+
[PEGs](http://en.wikipedia.org/wiki/Parsing_expression_grammar), same as [Antlr](http://www.antlr.org) and many other
|
17
|
+
popular grammar processors. Hence, be careful with features that
|
18
|
+
[PEGs](http://en.wikipedia.org/wiki/Parsing_expression_grammar) do not support,
|
19
|
+
like [left recursion](http://en.wikipedia.org/wiki/Left_recursion).
|
20
|
+
|
21
|
+
### DataMeta URI parsing
|
22
|
+
|
23
|
+
This gem provides convenient class for URI parsing with DataMeta Specifics.
|
24
|
+
|
25
|
+
The URI format is [typical](http://support.microsoft.com/kb/135975 "URL Format - MS Knowledge Base"):
|
26
|
+
|
27
|
+
protocol://user:password@server:port/path?query
|
28
|
+
|
29
|
+
Out of which,
|
30
|
+
|
31
|
+
* `protocol`: required, corresponds with DataMeta "platform", can be:
|
32
|
+
* `oracle` - for Oracle connections
|
33
|
+
* `mysql` - for MySQL connections
|
34
|
+
* `user`: optional, the user name for authentication
|
35
|
+
* `password`: password for the user, can be only used in conjunction with the `user`. Depending on a protocol,
|
36
|
+
can be either required or optional.
|
37
|
+
* `server`: required, host name or IP address
|
38
|
+
* `port`: optional, port number to connect to
|
39
|
+
* `path`: optional, protocol specific, may refer either to a full path on the server's filesystem or a name of the database
|
40
|
+
* `?query`: optional, regular format for the URL query, in `key=value` format separated by <tt>&</tt>, any special
|
41
|
+
characters encoded in the <tt>%xx</tt> format.
|
42
|
+
|
43
|
+
## SYNOPSIS:
|
44
|
+
|
45
|
+
* No command line runnables in this gem, it is a library only.
|
46
|
+
|
47
|
+
## INSTALL:
|
48
|
+
|
49
|
+
gem install dataMetaParse
|
50
|
+
|
51
|
+
## LICENSE:
|
52
|
+
|
53
|
+
[Apache v 2.0](https://github.com/eBayDataMeta/DataMeta/blob/master/LICENSE.md)
|
data/Rakefile
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
%w(yard rdoc/task rake/testtask fileutils ./lib/dataMetaParse).each{ |r| require r}
|
2
|
+
|
3
|
+
Rake::TestTask.new do |t|
|
4
|
+
t.libs << 'test'
|
5
|
+
end
|
6
|
+
|
7
|
+
desc 'Regen RDocs'
|
8
|
+
task :default => :docs
|
9
|
+
|
10
|
+
YARD::Rake::YardocTask.new('docs') {|r|
|
11
|
+
r.stats_options = ['--list-undoc']
|
12
|
+
}
|
13
|
+
|
@@ -0,0 +1,58 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
2
|
+
|
3
|
+
require 'treetop'
|
4
|
+
=begin rdoc
|
5
|
+
Grammar parsing commons for the dataMeta Project.
|
6
|
+
|
7
|
+
For command line details either check the new method's source or the README.rdoc file, the usage section.
|
8
|
+
=end
|
9
|
+
module DataMetaParse
|
10
|
+
# Current version
|
11
|
+
VERSION = '1.0.0'
|
12
|
+
|
13
|
+
=begin rdoc
|
14
|
+
Parsing error, RuntimeError augmented with report feature
|
15
|
+
|
16
|
+
=end
|
17
|
+
class Err < RuntimeError
|
18
|
+
attr_reader :source, :parser
|
19
|
+
=begin rdoc
|
20
|
+
Constructor, constructs also the error message passed to the super.
|
21
|
+
|
22
|
+
@param [String] source the next have been parsed to get this error
|
23
|
+
@param [Object] parser Treetop compiled parser whichever class it is. It may be +Treetop::Runtime::CompiledParser+
|
24
|
+
=end
|
25
|
+
def initialize(source, parser)
|
26
|
+
@source, @parser = source, parser
|
27
|
+
parser.failure_reason =~ /^(Expected .+) after/m
|
28
|
+
reason = $1 || 'REASONLESS'
|
29
|
+
# replace newlines with <EOL> to make them stand out
|
30
|
+
super %Q<ERROR at index #{parser.index}
|
31
|
+
#{reason.gsub("\n", '<EOL>')}:
|
32
|
+
#{source.lines.to_a[parser.failure_line - 1]}
|
33
|
+
#{'~' * (parser.failure_column - 1)}^
|
34
|
+
>
|
35
|
+
end
|
36
|
+
end
|
37
|
+
=begin rdoc
|
38
|
+
Loads the base rules from +dataMetaParse/basic.treetop+
|
39
|
+
=end
|
40
|
+
def loadBaseRulz
|
41
|
+
Treetop.load("#{File.dirname(__FILE__)}/dataMetaParse/basic")
|
42
|
+
end
|
43
|
+
|
44
|
+
=begin rdoc
|
45
|
+
Parse with error handling, convenience shortcut to the content of this method.
|
46
|
+
|
47
|
+
@param [Object] parser Treetop compiled parser whichever class it is. It may be +Treetop::Runtime::CompiledParser+
|
48
|
+
@param [String] source the data to parse with the given parser
|
49
|
+
@return [Object] either the AST, likely as +Treetop::Runtime::SyntaxNode+ if the parsing was successful or {Err} if it was not
|
50
|
+
or +nil+ if there is no match. It's not very consistent of when you get an Err or when you get a +nil+, it's
|
51
|
+
not exact science. One way to get a +nil+ is to cause mismatch in the very first token.
|
52
|
+
=end
|
53
|
+
def parse(parser, source)
|
54
|
+
parser.parse(source) || ( parser.failure_reason ? Err.new(source, parser) : nil)
|
55
|
+
end
|
56
|
+
|
57
|
+
module_function :loadBaseRulz, :parse
|
58
|
+
end
|
@@ -0,0 +1,236 @@
|
|
1
|
+
grammar DataMetaCommonsRoot
|
2
|
+
|
3
|
+
# Some staple rules
|
4
|
+
# w in the name means “whitespace”, e means End of Line, and capitalization means that the
|
5
|
+
# capitalized part is required (contrary to being optional).
|
6
|
+
|
7
|
+
|
8
|
+
# Required whitespace
|
9
|
+
rule W
|
10
|
+
[\s]+
|
11
|
+
# [ \t]+
|
12
|
+
end
|
13
|
+
|
14
|
+
rule notBlank
|
15
|
+
(!W .)
|
16
|
+
end
|
17
|
+
|
18
|
+
rule notBlanks
|
19
|
+
notBlank+
|
20
|
+
end
|
21
|
+
|
22
|
+
rule E
|
23
|
+
"\n"
|
24
|
+
end
|
25
|
+
|
26
|
+
# to the End Of Line inclusively
|
27
|
+
rule toE
|
28
|
+
tillE? E
|
29
|
+
end
|
30
|
+
|
31
|
+
# to the End Of Line exclusively
|
32
|
+
rule tillE
|
33
|
+
notE+
|
34
|
+
end
|
35
|
+
|
36
|
+
rule notE
|
37
|
+
(!E .)
|
38
|
+
end
|
39
|
+
|
40
|
+
# Optional whitespace
|
41
|
+
rule we
|
42
|
+
wE?
|
43
|
+
end
|
44
|
+
|
45
|
+
# Optional whitespace with required EOL
|
46
|
+
rule wE
|
47
|
+
( ( W "\n" / w meshLineComment / w "\n" ) w meshLineComment? )+
|
48
|
+
end
|
49
|
+
|
50
|
+
# Optional whitespace
|
51
|
+
rule w
|
52
|
+
W?
|
53
|
+
end
|
54
|
+
|
55
|
+
# Comment used in scripting languages like Bash, Ruby etc.
|
56
|
+
rule meshLineComment
|
57
|
+
'#' (!"\n" .)* "\n"
|
58
|
+
end
|
59
|
+
|
60
|
+
# /* */ comment used in C, Java etc, multiline
|
61
|
+
rule slashStarComment
|
62
|
+
'/*'
|
63
|
+
(
|
64
|
+
!'*/'
|
65
|
+
(. / "\n")
|
66
|
+
)*
|
67
|
+
'*/'
|
68
|
+
end
|
69
|
+
|
70
|
+
# Uppercase A to Z
|
71
|
+
rule AZ
|
72
|
+
[A-Z]+
|
73
|
+
end
|
74
|
+
|
75
|
+
# Lowercase A to Z
|
76
|
+
rule az
|
77
|
+
[a-z]+
|
78
|
+
end
|
79
|
+
|
80
|
+
# Alphabetical
|
81
|
+
rule alpha
|
82
|
+
[a-zA-Z]+
|
83
|
+
end
|
84
|
+
|
85
|
+
# Decimal digit
|
86
|
+
rule digit
|
87
|
+
[0-9]
|
88
|
+
end
|
89
|
+
|
90
|
+
rule dot
|
91
|
+
'.'
|
92
|
+
end
|
93
|
+
|
94
|
+
rule plus
|
95
|
+
'+'
|
96
|
+
end
|
97
|
+
|
98
|
+
rule minus
|
99
|
+
'-'
|
100
|
+
end
|
101
|
+
|
102
|
+
rule sign
|
103
|
+
plus / minus
|
104
|
+
end
|
105
|
+
|
106
|
+
rule decIntNoSign
|
107
|
+
digit+
|
108
|
+
end
|
109
|
+
|
110
|
+
rule decIntSignable
|
111
|
+
sign? decIntNoSign
|
112
|
+
end
|
113
|
+
|
114
|
+
rule decFraction
|
115
|
+
dot decIntNoSign
|
116
|
+
end
|
117
|
+
|
118
|
+
rule signDotDecFrac
|
119
|
+
sign? dot decIntNoSign
|
120
|
+
end
|
121
|
+
|
122
|
+
rule decIntDotFrac
|
123
|
+
decIntSignable dot
|
124
|
+
end
|
125
|
+
|
126
|
+
rule fullDecFrac
|
127
|
+
decIntSignable decFraction
|
128
|
+
end
|
129
|
+
|
130
|
+
rule fixedDecimal
|
131
|
+
fullDecFrac /signDotDecFrac / decIntDotFrac
|
132
|
+
end
|
133
|
+
|
134
|
+
# Hexacedimal digit
|
135
|
+
rule hexDigit
|
136
|
+
[0-9A-Fa-f]
|
137
|
+
end
|
138
|
+
|
139
|
+
# Alphanumeric
|
140
|
+
rule alphaNum
|
141
|
+
[0-9A-Za-z]
|
142
|
+
end
|
143
|
+
|
144
|
+
# dataMeta "Word" character, can be a part of a indentifier name.
|
145
|
+
rule wordChar
|
146
|
+
[0-9A-Za-z_]
|
147
|
+
end
|
148
|
+
|
149
|
+
rule dmWord
|
150
|
+
wordChar+
|
151
|
+
end
|
152
|
+
|
153
|
+
# Class name, first uppercase, then any of the word components
|
154
|
+
rule className
|
155
|
+
AZ wordChar?
|
156
|
+
end
|
157
|
+
|
158
|
+
# Variable name, first
|
159
|
+
rule varName
|
160
|
+
[_a-z] wordChar? # allows single underscore as a var name.
|
161
|
+
end
|
162
|
+
|
163
|
+
# C-style comment to reuse everywhere
|
164
|
+
rule c_comment
|
165
|
+
'/*'
|
166
|
+
(
|
167
|
+
!'*/'
|
168
|
+
(. / "\n")
|
169
|
+
)*
|
170
|
+
'*/'
|
171
|
+
end
|
172
|
+
|
173
|
+
# C-Style whitespace
|
174
|
+
rule c_whitespace
|
175
|
+
c_comment / W
|
176
|
+
end
|
177
|
+
|
178
|
+
# C-Style End of Line comment
|
179
|
+
rule cEolComment
|
180
|
+
'//' (!"\n" .)* "\n"
|
181
|
+
end
|
182
|
+
|
183
|
+
rule string
|
184
|
+
'"' letters:( !'"' stringLetter )* '"' {
|
185
|
+
def fetch
|
186
|
+
letters.elements.map { |el| el.elements.last.fetch }.join
|
187
|
+
end
|
188
|
+
}
|
189
|
+
end
|
190
|
+
|
191
|
+
rule stringLetter
|
192
|
+
'\\' char:["ntr] {
|
193
|
+
def fetch
|
194
|
+
case char.text_value
|
195
|
+
when '"'; '"'
|
196
|
+
when 'n'; "\n"
|
197
|
+
when 'r'; 13.chr
|
198
|
+
when 't'; 9.chr
|
199
|
+
when '\\'; "\\"
|
200
|
+
else
|
201
|
+
raise ArgumentException, "Invalid string escape '#{char.text_value}'"
|
202
|
+
end
|
203
|
+
end
|
204
|
+
}
|
205
|
+
/
|
206
|
+
. {
|
207
|
+
def fetch
|
208
|
+
text_value
|
209
|
+
end
|
210
|
+
}
|
211
|
+
end
|
212
|
+
|
213
|
+
rule sn
|
214
|
+
sN?
|
215
|
+
end
|
216
|
+
|
217
|
+
# borrowed from: http://whitequark.org/blog/2011/09/08/treetop-typical-errors
|
218
|
+
# C End of line
|
219
|
+
rule cEol
|
220
|
+
( ( S "\n" / s cEolComment / s "\n" ) s cEolComment? )+
|
221
|
+
end
|
222
|
+
|
223
|
+
# Scripting EOL, with mesh comment
|
224
|
+
rule sEol
|
225
|
+
( ( S "\n" / s meshLineComment / s "\n" ) s meshLineComment? )+
|
226
|
+
end
|
227
|
+
|
228
|
+
rule s
|
229
|
+
S?
|
230
|
+
end
|
231
|
+
|
232
|
+
rule S
|
233
|
+
[ \t]+
|
234
|
+
end
|
235
|
+
|
236
|
+
end
|
@@ -0,0 +1,229 @@
|
|
1
|
+
# Borrowed from: https://github.com/juretta/uri-templates/blob/master/grammar/uri_template.treetop
|
2
|
+
|
3
|
+
grammar UriTemplate
|
4
|
+
|
5
|
+
include DataMetaCommonsRoot
|
6
|
+
|
7
|
+
rule uri_template
|
8
|
+
uri_element more_elements:(uri_element)* {
|
9
|
+
def value(env={})
|
10
|
+
uri_element.value(env) << more_elements.elements.map{|el| el.value(env)}.join
|
11
|
+
end
|
12
|
+
}
|
13
|
+
end
|
14
|
+
|
15
|
+
rule uri_element
|
16
|
+
expansion / uri_part
|
17
|
+
end
|
18
|
+
|
19
|
+
rule expansion
|
20
|
+
'{'
|
21
|
+
c:(
|
22
|
+
var
|
23
|
+
/
|
24
|
+
operator
|
25
|
+
)
|
26
|
+
'}'
|
27
|
+
{
|
28
|
+
def value(env = {})
|
29
|
+
c.value(env)
|
30
|
+
end
|
31
|
+
}
|
32
|
+
end
|
33
|
+
|
34
|
+
rule uri_part
|
35
|
+
(unreserved / reserved / pct_encoded) {
|
36
|
+
def value(env = {})
|
37
|
+
text_value
|
38
|
+
end
|
39
|
+
}
|
40
|
+
end
|
41
|
+
|
42
|
+
rule arg
|
43
|
+
(reserved / unreserved / pct_encoded)*
|
44
|
+
end
|
45
|
+
|
46
|
+
rule op
|
47
|
+
(
|
48
|
+
'opt' {
|
49
|
+
# If each variable is undefined or an empty list then substitute the
|
50
|
+
# empty string, otherwise substitute the value of 'arg'.
|
51
|
+
def exec
|
52
|
+
lambda do |env, arg, vars|
|
53
|
+
ret = ''
|
54
|
+
vars.split(',').each do |var|
|
55
|
+
if env[var] && (env[var].respond_to?(:length) ? env[var].length > 0 : true)
|
56
|
+
ret = "#{arg}"
|
57
|
+
break
|
58
|
+
end
|
59
|
+
end
|
60
|
+
ret
|
61
|
+
end
|
62
|
+
end
|
63
|
+
}
|
64
|
+
/
|
65
|
+
'neg' {
|
66
|
+
# If all of the variables are un-defined or empty then substitute the
|
67
|
+
# value of arg, otherwise substitute the empty string.
|
68
|
+
def exec
|
69
|
+
lambda do |env, arg, vars|
|
70
|
+
ret = "#{arg}"
|
71
|
+
vars.split(',').each do |var|
|
72
|
+
if !env[var].to_s.blank?
|
73
|
+
ret = ""
|
74
|
+
break
|
75
|
+
end
|
76
|
+
end
|
77
|
+
ret
|
78
|
+
end
|
79
|
+
end
|
80
|
+
}
|
81
|
+
/
|
82
|
+
'prefix' {
|
83
|
+
# The prefix operator MUST only have one variable in its expansion. If
|
84
|
+
# the variable is defined and non-empty then substitute the value of
|
85
|
+
# arg followed by the value of the variable, otherwise substitute the
|
86
|
+
# empty string.
|
87
|
+
def exec
|
88
|
+
lambda do |env, prefix, vars|
|
89
|
+
v = env[vars]
|
90
|
+
if vars =~ /([^=]+)=([^=]+)/
|
91
|
+
var, default = $1.dup, $2.dup
|
92
|
+
v = env[var]
|
93
|
+
v = default if v.to_s.blank?
|
94
|
+
end
|
95
|
+
!v.blank? ? "#{prefix}#{UriTemplate::Encoder.encode(v)}" : ""
|
96
|
+
end
|
97
|
+
end
|
98
|
+
}
|
99
|
+
/
|
100
|
+
'suffix' {
|
101
|
+
# The suffix operator MUST only have one variable in its expansion. If
|
102
|
+
# the variable is defined and non-empty then substitute the value of
|
103
|
+
# the variable followed by the value of arg, otherwise substitute the
|
104
|
+
# empty string.
|
105
|
+
def exec
|
106
|
+
lambda do |env, append, vars|
|
107
|
+
v = env[vars]
|
108
|
+
if vars =~ /([^=]+)=([^=]+)/
|
109
|
+
var, default = $1.dup, $2.dup
|
110
|
+
v = env[var]
|
111
|
+
v = default if v.to_s.blank?
|
112
|
+
end
|
113
|
+
if v
|
114
|
+
val = UriTemplate::Encoder.encode(v)
|
115
|
+
!val.blank? ? "#{val}#{append}" : ""
|
116
|
+
else
|
117
|
+
''
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
}
|
122
|
+
/
|
123
|
+
'join' {
|
124
|
+
# For each variable that is defined and non-empty create a keyvalue
|
125
|
+
# string that is the concatenation of the variable name, "=", and the
|
126
|
+
# variable value. Concatenate more than one keyvalue string with
|
127
|
+
# intervening values of arg to create the substitution value.
|
128
|
+
def exec
|
129
|
+
lambda do |env, joinop, vars|
|
130
|
+
vars.split(',').map do |var|
|
131
|
+
v = env[var]
|
132
|
+
if var =~ /([^=]+)=([^=]+)/
|
133
|
+
var, default = $1.dup, $2.dup
|
134
|
+
v = env[var]
|
135
|
+
v = default if v.to_s.blank?
|
136
|
+
end
|
137
|
+
"#{var}=#{UriTemplate::Encoder.encode(v)}" if v
|
138
|
+
end.compact.join(joinop)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
}
|
142
|
+
/
|
143
|
+
'list' {
|
144
|
+
# The listjoin operator MUST have only one variable in its expansion
|
145
|
+
# and that variable must be a list. More than one variable is an
|
146
|
+
# error. If the list is non-empty then substitute the concatenation of
|
147
|
+
# all the list members with intervening values of arg. If the list is
|
148
|
+
# empty or the variable is undefined them substitute the empty string.
|
149
|
+
def exec
|
150
|
+
lambda do |env, joinop, vars|
|
151
|
+
return "" unless env[vars].respond_to? :each
|
152
|
+
env[vars].map do |v|
|
153
|
+
"#{UriTemplate::Encoder.encode(v)}" if v
|
154
|
+
end.compact.join(joinop)
|
155
|
+
end
|
156
|
+
end
|
157
|
+
}
|
158
|
+
)
|
159
|
+
end
|
160
|
+
|
161
|
+
rule vars
|
162
|
+
var ("," var)*
|
163
|
+
end
|
164
|
+
|
165
|
+
rule vardefault
|
166
|
+
(unreserved / pct_encoded)*
|
167
|
+
end
|
168
|
+
|
169
|
+
rule var
|
170
|
+
varname defaults:('=' vardefault)* {
|
171
|
+
def value(env={} )
|
172
|
+
return UriTemplate::Encoder.encode(env[name]) unless env[name].nil?
|
173
|
+
defaults.text_value.gsub(/=/, '')
|
174
|
+
end
|
175
|
+
|
176
|
+
def name
|
177
|
+
varname.text_value
|
178
|
+
end
|
179
|
+
}
|
180
|
+
end
|
181
|
+
|
182
|
+
rule operator
|
183
|
+
"-" op "|" arg "|" vars {
|
184
|
+
def value(env={})
|
185
|
+
op.exec.call(env, arg.text_value, vars.text_value) # if op.respond_to?(:exec)
|
186
|
+
end
|
187
|
+
}
|
188
|
+
end
|
189
|
+
|
190
|
+
rule varname
|
191
|
+
[a-zA-Z0-9] [a-zA-Z0-9_.-]*
|
192
|
+
end
|
193
|
+
|
194
|
+
rule alpha
|
195
|
+
[A-Za-z_]
|
196
|
+
end
|
197
|
+
|
198
|
+
rule alphanumeric
|
199
|
+
alpha / [0-9]
|
200
|
+
end
|
201
|
+
|
202
|
+
# see http://www.ietf.org/rfc/rfc3986.txt
|
203
|
+
rule unreserved
|
204
|
+
alphanumeric / "-" / "." / "_" / "~"
|
205
|
+
end
|
206
|
+
|
207
|
+
# see http://www.ietf.org/rfc/rfc3986.txt
|
208
|
+
rule pct_encoded
|
209
|
+
'%' hexdig hexdig
|
210
|
+
end
|
211
|
+
|
212
|
+
rule hexdig
|
213
|
+
[a-fA-F0-9]
|
214
|
+
end
|
215
|
+
|
216
|
+
# see http://www.ietf.org/rfc/rfc3986.txt
|
217
|
+
rule reserved
|
218
|
+
gen_delims / sub_delims
|
219
|
+
end
|
220
|
+
|
221
|
+
rule gen_delims
|
222
|
+
":" / "/" / "?" / "#" / "[" / "]" / "@"
|
223
|
+
end
|
224
|
+
|
225
|
+
rule sub_delims
|
226
|
+
"!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
|
227
|
+
end
|
228
|
+
|
229
|
+
end
|
@@ -0,0 +1,139 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
2
|
+
|
3
|
+
require 'uri'
|
4
|
+
require 'treetop'
|
5
|
+
|
6
|
+
module DataMetaParse
|
7
|
+
=begin rdoc
|
8
|
+
DataMeta URI with all the parts.
|
9
|
+
|
10
|
+
The user story:
|
11
|
+
|
12
|
+
* DataMeta URIs are used in DataMeta Scripts to specify all aspects of a data set identity and location.
|
13
|
+
* For physical access, a URI may be disassembled using this grammar and parser, the parts obtained so may be used
|
14
|
+
to access concrete physical resources.
|
15
|
+
|
16
|
+
@!attribute [r] proto
|
17
|
+
@return [String] the protocol part, such as +http+, +ftp+, +socparc+ etc
|
18
|
+
|
19
|
+
@!attribute [r] user
|
20
|
+
@return [String] the user id part of the URI, can be +nil+ and for some URIs may be in properties
|
21
|
+
|
22
|
+
@!attribute [r] pwd
|
23
|
+
@return [String] the password part of the URI, can be +nil+ and for some URIs may be in properties
|
24
|
+
|
25
|
+
@!attribute [r] host
|
26
|
+
@return [String] the host part of the URI
|
27
|
+
|
28
|
+
@!attribute [r] port
|
29
|
+
@return [Fixnum] the port number specified in the URI, can be +nil+
|
30
|
+
|
31
|
+
@!attribute [r] path
|
32
|
+
@return [String] for the +file+ protocol, path as specified, full or relative. For any other URI, the part between
|
33
|
+
the closing '/' after the +host:port+ part and the query part starting with '?'.
|
34
|
+
This means, for all other protocols except +file+, the path part will never have an initial slash.
|
35
|
+
|
36
|
+
@!attribute [r] props
|
37
|
+
@return [Hash] hash of properties keyed by the property name and pointing to a value if any
|
38
|
+
|
39
|
+
=end
|
40
|
+
class Uri
|
41
|
+
attr_reader :proto, :user, :pwd, :host, :port, :path, :props
|
42
|
+
|
43
|
+
=begin rdoc
|
44
|
+
Creates an instance of the object.
|
45
|
+
|
46
|
+
@param [String] proto see the property {#proto}
|
47
|
+
@param [String] user see the property {#user}
|
48
|
+
@param [String] pwd see the property {#pwd}
|
49
|
+
@param [String] host see the property {#host}
|
50
|
+
@param [String] port see the property {#port}
|
51
|
+
@param [String] path see the property {#path}
|
52
|
+
@param [String] props see the property {#props}
|
53
|
+
=end
|
54
|
+
def initialize(proto, user, pwd, host, port, path, props)
|
55
|
+
raise ArgumentError, 'Password specified but user not' if !user && pwd
|
56
|
+
raise ArgumentError, 'For file protocol, only path can be specified' if proto == 'file' && (
|
57
|
+
user || pwd || host || port || !props.empty?)
|
58
|
+
|
59
|
+
@proto, @user, @pwd, @host, @port, @path, @props = proto, user, pwd, host, port, path, props
|
60
|
+
end
|
61
|
+
|
62
|
+
=begin rdoc
|
63
|
+
Equality to the other
|
64
|
+
=end
|
65
|
+
def ==(other)
|
66
|
+
@proto == other.proto && @user == other.user && @pwd == other.pwd && @host == other.host &&
|
67
|
+
@port == other.port && @path == other.path && @props.eql?(other.props)
|
68
|
+
end
|
69
|
+
|
70
|
+
=begin rdoc
|
71
|
+
Same as the {#==}
|
72
|
+
=end
|
73
|
+
def eql?(other); self == other end
|
74
|
+
|
75
|
+
=begin rdoc
|
76
|
+
Loads the grammar - has to be done only once per RVM start. Relies on loading the basics.
|
77
|
+
=end
|
78
|
+
def self.loadRulz
|
79
|
+
Treetop.load(File.join(File.dirname(__FILE__), 'urlDataMeta'))
|
80
|
+
end
|
81
|
+
|
82
|
+
=begin rdoc
|
83
|
+
Instance to textual.
|
84
|
+
=end
|
85
|
+
def to_s
|
86
|
+
if @proto == 'file'
|
87
|
+
"file://#{@path}"
|
88
|
+
else
|
89
|
+
result = ''
|
90
|
+
result << @proto << '://'
|
91
|
+
result << URI.encode_www_form_component(@user) if @user
|
92
|
+
result << ':' << URI.encode_www_form_component(@pwd) if @user && @pwd
|
93
|
+
result << '@' if @user
|
94
|
+
result << @host
|
95
|
+
result << ':' << @port.to_s if @port
|
96
|
+
result << '/' if @path || !@props.empty?
|
97
|
+
result << @path if @path
|
98
|
+
|
99
|
+
result << '?' << @props.keys.map { |k|
|
100
|
+
v=@props[k]; v ? "#{k}=#{URI.encode_www_form_component(v)}" : "#{k}"
|
101
|
+
}.join('&') unless @props.empty?
|
102
|
+
|
103
|
+
result
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
=begin rdoc
|
108
|
+
Parses the source into the instance of the object.
|
109
|
+
@param [String] source the source, the URI specification to parse into the instance of this class
|
110
|
+
=end
|
111
|
+
def self.parse(source)
|
112
|
+
fileSignature = 'file://'
|
113
|
+
if source.start_with?(fileSignature)
|
114
|
+
Uri.new('file', nil, nil, nil, nil, source[fileSignature.length..-1], {})
|
115
|
+
else
|
116
|
+
parser = DataMetaUrlParser.new
|
117
|
+
ast = parser.parse(source)
|
118
|
+
return nil unless ast
|
119
|
+
proto = ast.proto.text_value
|
120
|
+
user = ast.user? ? ast.userPwd.user.text_value : nil
|
121
|
+
pwd = ast.pwd? ? URI.decode_www_form_component(ast.userPwd.password) : nil
|
122
|
+
host = ast.host.text_value
|
123
|
+
port = ast.port? ? ast.port.number : nil
|
124
|
+
path = ast.path? ? ast.uTail.path : nil
|
125
|
+
query = ast.query? ? ast.uTail.query : nil
|
126
|
+
props = {}
|
127
|
+
if query
|
128
|
+
pairs = query.split('&')
|
129
|
+
pairs.each { |pairString|
|
130
|
+
key, val = pairString.split('=')
|
131
|
+
props[key] = val ? URI.decode_www_form_component(val) : nil # this is caused by ¶mA¶mB=b, in which case paramA will be nil
|
132
|
+
}
|
133
|
+
end
|
134
|
+
Uri.new(proto, user, pwd, host, port, path, props)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
end
|
139
|
+
end
|
@@ -0,0 +1,110 @@
|
|
1
|
+
grammar DataMetaUrl
|
2
|
+
|
3
|
+
# DataMeta URL definition, a part of a URL definition
|
4
|
+
|
5
|
+
include DataMetaCommonsRoot
|
6
|
+
|
7
|
+
rule dataMetaUri
|
8
|
+
proto:urlProtocol '://' userPwd:userSpec? host:hostName port:portSpec? uTail:urlTail? {
|
9
|
+
def user?
|
10
|
+
!userPwd.text_value.empty? && userPwd.name && !userPwd.name.empty?
|
11
|
+
end
|
12
|
+
def pwd?
|
13
|
+
!userPwd.text_value.empty? && userPwd.password && !userPwd.password.empty?
|
14
|
+
end
|
15
|
+
def port?
|
16
|
+
!port.text_value.empty?
|
17
|
+
end
|
18
|
+
|
19
|
+
def tail?
|
20
|
+
!uTail.text_value.empty?
|
21
|
+
end
|
22
|
+
|
23
|
+
def path?
|
24
|
+
!uTail.text_value.empty? && uTail.path
|
25
|
+
end
|
26
|
+
def path
|
27
|
+
uTail.path
|
28
|
+
end
|
29
|
+
def query?
|
30
|
+
!uTail.text_value.empty? && uTail.query
|
31
|
+
end
|
32
|
+
def query
|
33
|
+
uTail.query
|
34
|
+
end
|
35
|
+
}
|
36
|
+
end
|
37
|
+
|
38
|
+
# Order is important, if you put "http" in front of the "https", https match will fail
|
39
|
+
rule urlProtocol
|
40
|
+
'https' / 'http' / 'ftp' / 'hdfs' / 'mysql' / 'oracle'
|
41
|
+
end
|
42
|
+
|
43
|
+
rule urlTail
|
44
|
+
'/' uPath:urlPath? uQuery:urlQuery? {
|
45
|
+
def path
|
46
|
+
uPath.text_value.empty? ? nil : uPath.text_value
|
47
|
+
end
|
48
|
+
def query
|
49
|
+
uQuery.text_value.empty? ? nil : uQuery.query
|
50
|
+
end
|
51
|
+
}
|
52
|
+
end
|
53
|
+
|
54
|
+
rule urlPath
|
55
|
+
(urlPathChar+)
|
56
|
+
end
|
57
|
+
|
58
|
+
rule urlPathChar
|
59
|
+
wordChar / '-' / '/' / '.'
|
60
|
+
#!'?' # - this causes infinite loop
|
61
|
+
end
|
62
|
+
|
63
|
+
rule urlQuery
|
64
|
+
'?' queryText:(notBlanks+) {
|
65
|
+
def query
|
66
|
+
queryText.text_value.empty? ? nil : queryText.text_value
|
67
|
+
end
|
68
|
+
}
|
69
|
+
end
|
70
|
+
|
71
|
+
rule hostChar
|
72
|
+
wordChar / '-' / '.'
|
73
|
+
end
|
74
|
+
|
75
|
+
rule hostName
|
76
|
+
hostChar+
|
77
|
+
end
|
78
|
+
|
79
|
+
rule portSpec
|
80
|
+
':' portNumber:(digit+) {
|
81
|
+
def number # port number or nil if none, can not default it here because it depends on the protocol
|
82
|
+
text_value.empty? ? nil : portNumber.text_value.to_i
|
83
|
+
end
|
84
|
+
}
|
85
|
+
end
|
86
|
+
|
87
|
+
rule notAtSymbol
|
88
|
+
!'@' .
|
89
|
+
end
|
90
|
+
|
91
|
+
rule pwdSpec
|
92
|
+
':' pwd:(notAtSymbol+) {
|
93
|
+
def empty?
|
94
|
+
text_value.empty?
|
95
|
+
end
|
96
|
+
}
|
97
|
+
end
|
98
|
+
|
99
|
+
rule userSpec
|
100
|
+
user:(wordChar+) pwdOpt:pwdSpec? '@' {
|
101
|
+
def name
|
102
|
+
user.text_value.empty? ? nil : user.text_value
|
103
|
+
end
|
104
|
+
def password
|
105
|
+
pwdOpt.text_value.empty? ? nil : pwdOpt.pwd.text_value
|
106
|
+
end
|
107
|
+
}
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
grammar TestNumbers
|
2
|
+
|
3
|
+
include DataMetaCommonsRoot
|
4
|
+
rule testNumbers
|
5
|
+
singleDecDigitNoSign:decIntNoSign W mulDecDigitNoSign:decIntNoSign
|
6
|
+
W signableIntNoSign:decIntSignable W signableIntPlus:decIntSignable W signableIntMinus:decIntSignable
|
7
|
+
W singleDigSingleDigFrac:fixedDecimal
|
8
|
+
W singDigDoubleDigFrac:fixedDecimal
|
9
|
+
W doubleDigSingleDigFrac:fixedDecimal
|
10
|
+
W doubleDigDotFrac:fixedDecimal
|
11
|
+
W dotDigitsFrac:fixedDecimal
|
12
|
+
W singleDigSingleDigFracPlus:fixedDecimal
|
13
|
+
W singDigDoubleDigFracPlus:fixedDecimal
|
14
|
+
W doubleDigSingleDigFracPlus:fixedDecimal
|
15
|
+
W doubleDigDotFracPlus:fixedDecimal
|
16
|
+
W dotDigitsFracPlus:fixedDecimal
|
17
|
+
W singleDigSingleDigFracMinus:fixedDecimal
|
18
|
+
W singDigDoubleDigFracMinus:fixedDecimal
|
19
|
+
W doubleDigSingleDigFracMinus:fixedDecimal
|
20
|
+
W doubleDigDotFracMinus:fixedDecimal
|
21
|
+
W dotDigitsFracMinus:fixedDecimal
|
22
|
+
end
|
23
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,6 @@
|
|
1
|
+
## keep this underscore naming in the test subdir, it's easier to append files names to test
|
2
|
+
%w(stringio test/unit logger).each { |r| require r }
|
3
|
+
# this is expected to run from the project root, normally by the rake file
|
4
|
+
require './lib/dataMetaParse'
|
5
|
+
require './lib/dataMetaParse/uriDataMeta'
|
6
|
+
require './test/utils'
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# keep this underscore naming in the test subdir, it's easier to append files names to test
|
2
|
+
require './test/utils'
|
3
|
+
|
4
|
+
# Unit test cases for the DataMetaParse
|
5
|
+
# See for instance:
|
6
|
+
# - test_full
|
7
|
+
# Assertions: http://ruby-doc.org/stdlib-1.9.3/libdoc/test/unit/rdoc/Test/Unit/Assertions.html
|
8
|
+
class TestNumberParse < Test::Unit::TestCase
|
9
|
+
|
10
|
+
include DataMetaParseTestUtil
|
11
|
+
L.info "Loaded Base Rules: #{BASE_RULS}"
|
12
|
+
# Loads the grammars, creates a parser
|
13
|
+
def setup
|
14
|
+
numbers = Treetop.load('./test/numbers')
|
15
|
+
L.info "Loaded numbers: #{numbers.inspect}"
|
16
|
+
|
17
|
+
@parser = TestNumbersParser.new
|
18
|
+
L.info "#{@parser.inspect}"
|
19
|
+
end
|
20
|
+
|
21
|
+
=begin rdoc
|
22
|
+
Numbers parsing test
|
23
|
+
=end
|
24
|
+
def test_numbers
|
25
|
+
ast = DataMetaParse.parse(@parser,
|
26
|
+
%q<1 123 4321 +4321 -4321 1.1 1.23 12.3 12. .12 +1.1 +1.23 +12.3 +12. +.12 -1.1 -1.23 -12.3 -12. -.12>)
|
27
|
+
raise 'Numbers parse unsuccessful' unless ast
|
28
|
+
raise ast if ast.is_a?(DataMetaParse::Err)
|
29
|
+
L.info "AST:\n#{ast.inspect}"
|
30
|
+
assert_equal(1, ast.singleDecDigitNoSign.text_value.to_i)
|
31
|
+
assert_equal(123, ast.mulDecDigitNoSign.text_value.to_i)
|
32
|
+
assert_equal(4321, ast.signableIntNoSign.text_value.to_i)
|
33
|
+
assert_equal(4321, ast.signableIntPlus.text_value.to_i)
|
34
|
+
assert_equal(-4321, ast.signableIntMinus.text_value.to_i)
|
35
|
+
assert_equal(1.1, ast.singleDigSingleDigFrac.text_value.to_f)
|
36
|
+
assert_equal(1.23, ast.singDigDoubleDigFrac.text_value.to_f)
|
37
|
+
assert_equal(12.3, ast.doubleDigSingleDigFrac.text_value.to_f)
|
38
|
+
assert_equal(12.0, ast.doubleDigDotFrac.text_value.to_f)
|
39
|
+
assert_equal(0.12, ast.dotDigitsFrac.text_value.to_f)
|
40
|
+
assert_equal(1.1, ast.singleDigSingleDigFracPlus.text_value.to_f)
|
41
|
+
assert_equal(1.23, ast.singDigDoubleDigFracPlus.text_value.to_f)
|
42
|
+
assert_equal(12.3, ast.doubleDigSingleDigFracPlus.text_value.to_f)
|
43
|
+
assert_equal(12.0, ast.doubleDigDotFracPlus.text_value.to_f)
|
44
|
+
assert_equal(0.12, ast.dotDigitsFracPlus.text_value.to_f)
|
45
|
+
assert_equal(-1.1, ast.singleDigSingleDigFracMinus.text_value.to_f)
|
46
|
+
assert_equal(-1.23, ast.singDigDoubleDigFracMinus.text_value.to_f)
|
47
|
+
assert_equal(-12.3, ast.doubleDigSingleDigFracMinus.text_value.to_f)
|
48
|
+
assert_equal(-12.0, ast.doubleDigDotFracMinus.text_value.to_f)
|
49
|
+
assert_equal(-0.12, ast.dotDigitsFracMinus.text_value.to_f)
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
@@ -0,0 +1,162 @@
|
|
1
|
+
# keep this underscore naming in the test subdir, it's easier to append files names to test
|
2
|
+
require './test/utils'
|
3
|
+
|
4
|
+
# Unit test cases for the DataMetaParse
|
5
|
+
# See for instance:
|
6
|
+
# - test_full
|
7
|
+
# Assertions: https://ruby-doc.org/stdlib-2.1.4/libdoc/test/unit/rdoc/Test/Unit/Assertions.html
|
8
|
+
#noinspection RubyStringKeysInHashInspection
|
9
|
+
class TestDataMetaParse < Test::Unit::TestCase
|
10
|
+
|
11
|
+
include DataMetaParseTestUtil
|
12
|
+
|
13
|
+
# Stub
|
14
|
+
def setup
|
15
|
+
DataMetaParse::Uri.loadRulz
|
16
|
+
end
|
17
|
+
|
18
|
+
=begin rdoc
|
19
|
+
Checks one URI specification, reports results
|
20
|
+
=end
|
21
|
+
def assertUri(uriSpec, expected)
|
22
|
+
uri = DataMetaParse::Uri.parse(uriSpec)
|
23
|
+
if uri
|
24
|
+
L.info %Q<parsed "#{uriSpec}"; protocol: #{uri.proto}, user: #{uri.user}, pwd=#{uri.pwd}, host:#{uri.host}> +
|
25
|
+
", port=#{uri.port}, path:#{uri.path}, props: #{uri.props.inspect}\nre:#{uri}"
|
26
|
+
else
|
27
|
+
L.info %Q<parsed "#{uriSpec}", no match>
|
28
|
+
end
|
29
|
+
|
30
|
+
assert_equal(expected, uri) # assert_equal goes by object.eql?
|
31
|
+
end
|
32
|
+
|
33
|
+
# Tests General URI grammar
|
34
|
+
def test_GeneralUriGrammar
|
35
|
+
#proto, user, pwd, host, port, path, props
|
36
|
+
assertUri(%q<http://www.domain.tld>, DataMetaParse::Uri.new('http', nil, nil, 'www.domain.tld', nil, nil, {}))
|
37
|
+
assertUri(%q<http://www.domain.tld/>, DataMetaParse::Uri.new('http', nil, nil, 'www.domain.tld', nil, nil, {}))
|
38
|
+
|
39
|
+
assertUri(%q<http://www.domain.tld:9090>,
|
40
|
+
DataMetaParse::Uri.new('http', nil, nil, 'www.domain.tld', 9090, nil, {}))
|
41
|
+
|
42
|
+
assertUri(%q<http://www.domain.tld:9090/>,
|
43
|
+
DataMetaParse::Uri.new('http', nil, nil, 'www.domain.tld', 9090, nil, {}))
|
44
|
+
|
45
|
+
assertUri(%q<http://joe_1@www.domain.tld>,
|
46
|
+
DataMetaParse::Uri.new('http', 'joe_1', nil, 'www.domain.tld', nil, nil, {}))
|
47
|
+
|
48
|
+
assertUri(%q<https://joe_1@www.domain.tld>,
|
49
|
+
DataMetaParse::Uri.new('https', 'joe_1', nil, 'www.domain.tld', nil, nil, {}))
|
50
|
+
|
51
|
+
assertUri(%q<http://joe_1:secret@www.domain.tld>,
|
52
|
+
DataMetaParse::Uri.new('http', 'joe_1', 'secret', 'www.domain.tld', nil, nil, {}))
|
53
|
+
|
54
|
+
assertUri(%q<https://joe_1:secr%26et@www.domain.tld>,
|
55
|
+
DataMetaParse::Uri.new('https', 'joe_1', 'secr&et', 'www.domain.tld', nil, nil, {}))
|
56
|
+
|
57
|
+
assertUri(%q<ftp://www.domain.tld/path/dir>,
|
58
|
+
DataMetaParse::Uri.new('ftp', nil, nil, 'www.domain.tld', nil, 'path/dir', {}))
|
59
|
+
|
60
|
+
assertUri(%q<http://www.domain.tld/path/dir>,
|
61
|
+
DataMetaParse::Uri.new('http', nil, nil, 'www.domain.tld', nil, 'path/dir', {}))
|
62
|
+
|
63
|
+
assertUri(%q<http://www.domain.tld/path/dir?qa=aVal&qb=bVal>,
|
64
|
+
DataMetaParse::Uri.new('http', nil, nil, 'www.domain.tld', nil, 'path/dir',
|
65
|
+
{'qa' => 'aVal', 'qb' => 'bVal'}))
|
66
|
+
|
67
|
+
assertUri(%q<http://joe:secret@www.domain.tld/path/dir?qa=aVal&qb&qc=cVal>,
|
68
|
+
DataMetaParse::Uri.new('http', 'joe', 'secret', 'www.domain.tld', nil, 'path/dir',
|
69
|
+
{'qa' => 'aVal', 'qb' => nil, 'qc' => 'cVal'}))
|
70
|
+
|
71
|
+
assertUri(%q<http://joe:secret@www.domain.tld/path/dir?qa=a%2FVal&qb=b%26Val&qc>,
|
72
|
+
DataMetaParse::Uri.new('http', 'joe', 'secret', 'www.domain.tld', nil, 'path/dir',
|
73
|
+
{'qa' => 'a/Val', 'qb' => 'b&Val', 'qc' => nil}))
|
74
|
+
|
75
|
+
assertUri(%q<http://joe:secret@www.domain.tld:8443/path/dir?qa=a%2FVal&qb=b%26Val&qc>,
|
76
|
+
DataMetaParse::Uri.new('http', 'joe', 'secret', 'www.domain.tld', 8443, 'path/dir',
|
77
|
+
{'qa' => 'a/Val', 'qb' => 'b&Val', 'qc' => nil}))
|
78
|
+
|
79
|
+
assertUri(%q<file:///dir/otherDir/file.ext>,
|
80
|
+
DataMetaParse::Uri.new('file', nil, nil, nil, nil, '/dir/otherDir/file.ext', {}))
|
81
|
+
|
82
|
+
assertUri(%q<file://dir/otherDir/file.ext>,
|
83
|
+
DataMetaParse::Uri.new('file', nil, nil, nil, nil, 'dir/otherDir/file.ext', {}))
|
84
|
+
|
85
|
+
assertUri(%q<hdfs://node-geo-ss.vip.acme.com/dir/otherDir/file.ext?cluster=hadoopCluster&format=seqFile&blkSize=128M>,
|
86
|
+
DataMetaParse::Uri.new('hdfs', nil, nil, 'node-geo-ss.vip.acme.com', nil, 'dir/otherDir/file.ext',
|
87
|
+
{'cluster' => 'hadoopCluster', 'format' => 'seqFile', 'blkSize' => '128M'}))
|
88
|
+
|
89
|
+
assertUri(%q<hdfs://node-geo-ss.vip.acme.com:8020/dir/otherDir/file.ext?cluster=hadoopCluster&format=seqFile&blkSize=128M>,
|
90
|
+
DataMetaParse::Uri.new('hdfs', nil, nil, 'node-geo-ss.vip.acme.com', 8020, 'dir/otherDir/file.ext',
|
91
|
+
{'cluster' => 'hadoopCluster', 'format' => 'seqFile', 'blkSize' => '128M'}))
|
92
|
+
|
93
|
+
assertUri(%q<mysql://DM_USER:DataMeta_PWD@db-host:3306/database?sql=select%20*%20from%20entity%20where%20id%20%3D%201>,
|
94
|
+
DataMetaParse::Uri.new('mysql', 'DM_USER', 'DataMeta_PWD', 'db-host', 3306, 'database',
|
95
|
+
{'sql' => 'select * from entity where id = 1'}))
|
96
|
+
|
97
|
+
assertUri(%q<mysql://DM_USER:DataMeta_PWD@db-host/database?sql=select%20*%20from%20entity%20where%20id%20%3D%201>,
|
98
|
+
DataMetaParse::Uri.new('mysql', 'DM_USER', 'DataMeta_PWD', 'db-host', nil, 'database',
|
99
|
+
{'sql' => 'select * from entity where id = 1'}))
|
100
|
+
|
101
|
+
assertUri(%q<mysql://DM_USER:DataMeta_PWD@db-host/database?sql=select%20*%20from%20entity%20where%20id%20%3D%201%20and%20c2%20%3D%20%27abc%27>,
|
102
|
+
DataMetaParse::Uri.new('mysql', 'DM_USER', 'DataMeta_PWD', 'db-host', nil, 'database',
|
103
|
+
{'sql' => %q<select * from entity where id = 1 and c2 = 'abc'>}))
|
104
|
+
|
105
|
+
assertUri(%q<mysql://DM_USER:DataMeta_PWD@db-host/database?sql=select%20*%20from%20entity%20where%20id%20!%3D%201>,
|
106
|
+
DataMetaParse::Uri.new('mysql', 'DM_USER', 'DataMeta_PWD', 'db-host', nil, 'database',
|
107
|
+
{'sql' => %q|select * from entity where id != 1|}))
|
108
|
+
|
109
|
+
assertUri(%q<mysql://DM_USER:DataMeta_PWD@db-host/database?sql=select%20*%20from%20entity%20where%20id%20%3C%3E%201>,
|
110
|
+
DataMetaParse::Uri.new('mysql', 'DM_USER', 'DataMeta_PWD', 'db-host', nil, 'database',
|
111
|
+
{'sql' => %q|select * from entity where id <> 1|}))
|
112
|
+
|
113
|
+
assertUri(%q<mysql://DM_USER:DataMeta_PWD@db-host/database?sql=select%20*%20from%20entity%20where%20id%20%3E%3D%201>,
|
114
|
+
DataMetaParse::Uri.new('mysql', 'DM_USER', 'DataMeta_PWD', 'db-host', nil, 'database',
|
115
|
+
{'sql' => %q|select * from entity where id >= 1|}))
|
116
|
+
|
117
|
+
assertUri(%q<mysql://DM_USER:DataMeta_PWD@db-host/database?sql=select%20*%20from%20database.entity%20where%20id%20%3E%3D%201>,
|
118
|
+
DataMetaParse::Uri.new('mysql', 'DM_USER', 'DataMeta_PWD', 'db-host', nil, 'database',
|
119
|
+
{'sql' => %q|select * from database.entity where id >= 1|}))
|
120
|
+
|
121
|
+
assertUri(%q<mysql://DM_USER:DataMeta_PWD@db-host/database?sql=select%20*%20from%20entity%20where%20id%20in%20(1%2C2%2C3)>,
|
122
|
+
DataMetaParse::Uri.new('mysql', 'DM_USER', 'DataMeta_PWD', 'db-host', nil, 'database',
|
123
|
+
{'sql' => %q|select * from entity where id in (1,2,3)|}))
|
124
|
+
|
125
|
+
assertUri(%q<oracle://DM_USER:DataMeta_PWD@db-host:3306/database?sql=select%20*%20from%20entity%20where%20id%20%3D%201>,
|
126
|
+
DataMetaParse::Uri.new('oracle', 'DM_USER', 'DataMeta_PWD', 'db-host', 3306, 'database',
|
127
|
+
{'sql' => 'select * from entity where id = 1'}))
|
128
|
+
|
129
|
+
assertUri(%q<mysql://DM_USER:DataMeta_PWD@db-host/database?sql=select%20*%20from%20database.entity%20where%20id%20like%20%27%25a%25%27>,
|
130
|
+
DataMetaParse::Uri.new('mysql', 'DM_USER', 'DataMeta_PWD', 'db-host', nil, 'database',
|
131
|
+
{'sql' => %q|select * from database.entity where id like '%a%'|}))
|
132
|
+
end
|
133
|
+
|
134
|
+
=begin rdoc
|
135
|
+
Check bad URLs, must raise errors:
|
136
|
+
=end
|
137
|
+
def test_badUris
|
138
|
+
assert_raise(ArgumentError) {
|
139
|
+
DataMetaParse::Uri.new('file', 'blah', nil, nil, nil, 'dir/otherDir/file.ext', {})
|
140
|
+
}
|
141
|
+
|
142
|
+
assert_raise(ArgumentError) {
|
143
|
+
DataMetaParse::Uri.new('file', nil, 'blah', nil, nil, 'dir/otherDir/file.ext', {})
|
144
|
+
}
|
145
|
+
|
146
|
+
assert_raise(ArgumentError) {
|
147
|
+
DataMetaParse::Uri.new('file', nil, nil, 'blah', nil, 'dir/otherDir/file.ext', {})
|
148
|
+
}
|
149
|
+
|
150
|
+
assert_raise(ArgumentError) {
|
151
|
+
DataMetaParse::Uri.new('file', nil, nil, nil, 8080, 'dir/otherDir/file.ext', {})
|
152
|
+
}
|
153
|
+
|
154
|
+
assert_raise(ArgumentError) { # password but no user, no good:
|
155
|
+
DataMetaParse::Uri.new('http', nil, 'secret', 'www.domain.tld', nil, 'path/dir',
|
156
|
+
{'qa' => 'aVal', 'qb' => 'bVal'})
|
157
|
+
}
|
158
|
+
# Hadoop Good Practice: disallow hdfs specifications without namenode
|
159
|
+
assert_equal(nil, DataMetaParse::Uri.parse('hdfs:///dir/otherDir/file.ext'))
|
160
|
+
|
161
|
+
end
|
162
|
+
end
|
data/test/utils.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
|
2
|
+
=begin rdoc
|
3
|
+
Utilities for testing
|
4
|
+
=end
|
5
|
+
module DataMetaParseTestUtil
|
6
|
+
L = Logger.new('parseTests.log', 0, 10000)
|
7
|
+
L.level = Logger::DEBUG
|
8
|
+
L.datetime_format = '%Y-%m-%d %H:%M:%S'
|
9
|
+
# same as: DataMetaParse.loadBaseRulz
|
10
|
+
BASE_RULS = Treetop.load('./lib/dataMetaParse/basic')
|
11
|
+
|
12
|
+
end
|
metadata
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: dataMetaParse
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Michael Bergens
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-01-15 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: treetop
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.6'
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 1.6.8
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '1.6'
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 1.6.8
|
33
|
+
description: DataMeta Parser commons; common rules and some reusable grammars
|
34
|
+
email: michael.bergens@gmail.com
|
35
|
+
executables: []
|
36
|
+
extensions: []
|
37
|
+
extra_rdoc_files: []
|
38
|
+
files:
|
39
|
+
- ".yardopts"
|
40
|
+
- History.md
|
41
|
+
- PostInstall.txt
|
42
|
+
- README.md
|
43
|
+
- Rakefile
|
44
|
+
- lib/dataMetaParse.rb
|
45
|
+
- lib/dataMetaParse/basic.treetop
|
46
|
+
- lib/dataMetaParse/uri.treetop
|
47
|
+
- lib/dataMetaParse/uriDataMeta.rb
|
48
|
+
- lib/dataMetaParse/urlDataMeta.treetop
|
49
|
+
- test/numbers.treetop
|
50
|
+
- test/test_helper.rb
|
51
|
+
- test/test_numbers.rb
|
52
|
+
- test/test_uriParser.rb
|
53
|
+
- test/utils.rb
|
54
|
+
homepage: https://github.com/eBayDataMeta
|
55
|
+
licenses:
|
56
|
+
- Apache-2.0
|
57
|
+
metadata: {}
|
58
|
+
post_install_message:
|
59
|
+
rdoc_options: []
|
60
|
+
require_paths:
|
61
|
+
- lib
|
62
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: 2.0.0
|
67
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '0'
|
72
|
+
requirements:
|
73
|
+
- No specific requirements
|
74
|
+
rubyforge_project:
|
75
|
+
rubygems_version: 2.5.1
|
76
|
+
signing_key:
|
77
|
+
specification_version: 4
|
78
|
+
summary: DataMeta Parser commons
|
79
|
+
test_files:
|
80
|
+
- test/test_numbers.rb
|
81
|
+
- test/test_uriParser.rb
|
82
|
+
- test/numbers.treetop
|
83
|
+
- test/test_helper.rb
|