dataMetaParse 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.yardopts +1 -0
- data/History.md +5 -0
- data/PostInstall.txt +2 -0
- data/README.md +53 -0
- data/Rakefile +13 -0
- data/lib/dataMetaParse.rb +58 -0
- data/lib/dataMetaParse/basic.treetop +236 -0
- data/lib/dataMetaParse/uri.treetop +229 -0
- data/lib/dataMetaParse/uriDataMeta.rb +139 -0
- data/lib/dataMetaParse/urlDataMeta.treetop +110 -0
- data/test/numbers.treetop +23 -0
- data/test/test_helper.rb +6 -0
- data/test/test_numbers.rb +52 -0
- data/test/test_uriParser.rb +162 -0
- data/test/utils.rb +12 -0
- metadata +83 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 7a0e72fa6c8ea304157a65156fb66f908b74bef4
|
4
|
+
data.tar.gz: 0cb917be27d2ca056bc345e277e3377dcbdd1334
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 4aa00db56ca8992e43119ac430759bc0429390b8c3dc6f1f4ecfc0000eebd87e711ac5ea4852472f10d15e50868ec0a9d97c320aa6f8c50325967e0f4b4dfeff
|
7
|
+
data.tar.gz: d94b08fa48f84f458d8bc18c5e5ccc9feebba35c6e62caaf58070c9188157fe204285b906271d5ea6b1e9db874c2ccdc3e04b1948db0c5075bc6d833332d369a
|
data/.yardopts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--title "DataMeta Parsing Utils" -r README.md --charset UTF-8 lib/**/*rb - README.md
|
data/History.md
ADDED
data/PostInstall.txt
ADDED
data/README.md
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
# dataMetaParse
|
2
|
+
|
3
|
+
DataMeta Parser commons: common rules and some reusable grammars
|
4
|
+
|
5
|
+
References to this gem's:
|
6
|
+
|
7
|
+
* [Source](https://github.com/eBayDataMeta/DataMeta-gems)
|
8
|
+
|
9
|
+
## DESCRIPTION:
|
10
|
+
|
11
|
+
See the [DataMeta Project](https://github.com/eBayDataMeta/DataMeta)
|
12
|
+
|
13
|
+
## FEATURES/PROBLEMS:
|
14
|
+
|
15
|
+
* This gem uses [treetop](http://treetop.rubyforge.org) for grammar processing which only works with
|
16
|
+
[PEGs](http://en.wikipedia.org/wiki/Parsing_expression_grammar), same as [Antlr](http://www.antlr.org) and many other
|
17
|
+
popular grammar processors. Hence, be careful with features that
|
18
|
+
[PEGs](http://en.wikipedia.org/wiki/Parsing_expression_grammar) do not support,
|
19
|
+
like [left recursion](http://en.wikipedia.org/wiki/Left_recursion).
|
20
|
+
|
21
|
+
### DataMeta URI parsing
|
22
|
+
|
23
|
+
This gem provides convenient class for URI parsing with DataMeta Specifics.
|
24
|
+
|
25
|
+
The URI format is [typical](http://support.microsoft.com/kb/135975 "URL Format - MS Knowledge Base"):
|
26
|
+
|
27
|
+
protocol://user:password@server:port/path?query
|
28
|
+
|
29
|
+
Out of which,
|
30
|
+
|
31
|
+
* `protocol`: required, corresponds with DataMeta "platform", can be:
|
32
|
+
* `oracle` - for Oracle connections
|
33
|
+
* `mysql` - for MySQL connections
|
34
|
+
* `user`: optional, the user name for authentication
|
35
|
+
* `password`: password for the user, can be only used in conjunction with the `user`. Depending on a protocol,
|
36
|
+
can be either required or optional.
|
37
|
+
* `server`: required, host name or IP address
|
38
|
+
* `port`: optional, port number to connect to
|
39
|
+
* `path`: optional, protocol specific, may refer either to a full path on the server's filesystem or a name of the database
|
40
|
+
* `?query`: optional, regular format for the URL query, in `key=value` format separated by <tt>&</tt>, any special
|
41
|
+
characters encoded in the <tt>%xx</tt> format.
|
42
|
+
|
43
|
+
## SYNOPSIS:
|
44
|
+
|
45
|
+
* No command line runnables in this gem, it is a library only.
|
46
|
+
|
47
|
+
## INSTALL:
|
48
|
+
|
49
|
+
gem install dataMetaParse
|
50
|
+
|
51
|
+
## LICENSE:
|
52
|
+
|
53
|
+
[Apache v 2.0](https://github.com/eBayDataMeta/DataMeta/blob/master/LICENSE.md)
|
data/Rakefile
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
%w(yard rdoc/task rake/testtask fileutils ./lib/dataMetaParse).each{ |r| require r}
|
2
|
+
|
3
|
+
Rake::TestTask.new do |t|
|
4
|
+
t.libs << 'test'
|
5
|
+
end
|
6
|
+
|
7
|
+
desc 'Regen RDocs'
|
8
|
+
task :default => :docs
|
9
|
+
|
10
|
+
YARD::Rake::YardocTask.new('docs') {|r|
|
11
|
+
r.stats_options = ['--list-undoc']
|
12
|
+
}
|
13
|
+
|
@@ -0,0 +1,58 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
2
|
+
|
3
|
+
require 'treetop'
|
4
|
+
=begin rdoc
|
5
|
+
Grammar parsing commons for the dataMeta Project.
|
6
|
+
|
7
|
+
For command line details either check the new method's source or the README.rdoc file, the usage section.
|
8
|
+
=end
|
9
|
+
module DataMetaParse
|
10
|
+
# Current version
|
11
|
+
VERSION = '1.0.0'
|
12
|
+
|
13
|
+
=begin rdoc
|
14
|
+
Parsing error, RuntimeError augmented with report feature
|
15
|
+
|
16
|
+
=end
|
17
|
+
class Err < RuntimeError
|
18
|
+
attr_reader :source, :parser
|
19
|
+
=begin rdoc
|
20
|
+
Constructor, constructs also the error message passed to the super.
|
21
|
+
|
22
|
+
@param [String] source the next have been parsed to get this error
|
23
|
+
@param [Object] parser Treetop compiled parser whichever class it is. It may be +Treetop::Runtime::CompiledParser+
|
24
|
+
=end
|
25
|
+
def initialize(source, parser)
|
26
|
+
@source, @parser = source, parser
|
27
|
+
parser.failure_reason =~ /^(Expected .+) after/m
|
28
|
+
reason = $1 || 'REASONLESS'
|
29
|
+
# replace newlines with <EOL> to make them stand out
|
30
|
+
super %Q<ERROR at index #{parser.index}
|
31
|
+
#{reason.gsub("\n", '<EOL>')}:
|
32
|
+
#{source.lines.to_a[parser.failure_line - 1]}
|
33
|
+
#{'~' * (parser.failure_column - 1)}^
|
34
|
+
>
|
35
|
+
end
|
36
|
+
end
|
37
|
+
=begin rdoc
|
38
|
+
Loads the base rules from +dataMetaParse/basic.treetop+
|
39
|
+
=end
|
40
|
+
def loadBaseRulz
|
41
|
+
Treetop.load("#{File.dirname(__FILE__)}/dataMetaParse/basic")
|
42
|
+
end
|
43
|
+
|
44
|
+
=begin rdoc
|
45
|
+
Parse with error handling, convenience shortcut to the content of this method.
|
46
|
+
|
47
|
+
@param [Object] parser Treetop compiled parser whichever class it is. It may be +Treetop::Runtime::CompiledParser+
|
48
|
+
@param [String] source the data to parse with the given parser
|
49
|
+
@return [Object] either the AST, likely as +Treetop::Runtime::SyntaxNode+ if the parsing was successful or {Err} if it was not
|
50
|
+
or +nil+ if there is no match. It's not very consistent of when you get an Err or when you get a +nil+, it's
|
51
|
+
not exact science. One way to get a +nil+ is to cause mismatch in the very first token.
|
52
|
+
=end
|
53
|
+
def parse(parser, source)
|
54
|
+
parser.parse(source) || ( parser.failure_reason ? Err.new(source, parser) : nil)
|
55
|
+
end
|
56
|
+
|
57
|
+
module_function :loadBaseRulz, :parse
|
58
|
+
end
|
@@ -0,0 +1,236 @@
|
|
1
|
+
grammar DataMetaCommonsRoot
|
2
|
+
|
3
|
+
# Some staple rules
|
4
|
+
# w in the name means “whitespace”, e means End of Line, and capitalization means that the
|
5
|
+
# capitalized part is required (contrary to being optional).
|
6
|
+
|
7
|
+
|
8
|
+
# Required whitespace
|
9
|
+
rule W
|
10
|
+
[\s]+
|
11
|
+
# [ \t]+
|
12
|
+
end
|
13
|
+
|
14
|
+
rule notBlank
|
15
|
+
(!W .)
|
16
|
+
end
|
17
|
+
|
18
|
+
rule notBlanks
|
19
|
+
notBlank+
|
20
|
+
end
|
21
|
+
|
22
|
+
rule E
|
23
|
+
"\n"
|
24
|
+
end
|
25
|
+
|
26
|
+
# to the End Of Line inclusively
|
27
|
+
rule toE
|
28
|
+
tillE? E
|
29
|
+
end
|
30
|
+
|
31
|
+
# to the End Of Line exclusively
|
32
|
+
rule tillE
|
33
|
+
notE+
|
34
|
+
end
|
35
|
+
|
36
|
+
rule notE
|
37
|
+
(!E .)
|
38
|
+
end
|
39
|
+
|
40
|
+
# Optional whitespace
|
41
|
+
rule we
|
42
|
+
wE?
|
43
|
+
end
|
44
|
+
|
45
|
+
# Optional whitespace with required EOL
|
46
|
+
rule wE
|
47
|
+
( ( W "\n" / w meshLineComment / w "\n" ) w meshLineComment? )+
|
48
|
+
end
|
49
|
+
|
50
|
+
# Optional whitespace
|
51
|
+
rule w
|
52
|
+
W?
|
53
|
+
end
|
54
|
+
|
55
|
+
# Comment used in scripting languages like Bash, Ruby etc.
|
56
|
+
rule meshLineComment
|
57
|
+
'#' (!"\n" .)* "\n"
|
58
|
+
end
|
59
|
+
|
60
|
+
# /* */ comment used in C, Java etc, multiline
|
61
|
+
rule slashStarComment
|
62
|
+
'/*'
|
63
|
+
(
|
64
|
+
!'*/'
|
65
|
+
(. / "\n")
|
66
|
+
)*
|
67
|
+
'*/'
|
68
|
+
end
|
69
|
+
|
70
|
+
# Uppercase A to Z
|
71
|
+
rule AZ
|
72
|
+
[A-Z]+
|
73
|
+
end
|
74
|
+
|
75
|
+
# Lowercase A to Z
|
76
|
+
rule az
|
77
|
+
[a-z]+
|
78
|
+
end
|
79
|
+
|
80
|
+
# Alphabetical
|
81
|
+
rule alpha
|
82
|
+
[a-zA-Z]+
|
83
|
+
end
|
84
|
+
|
85
|
+
# Decimal digit
|
86
|
+
rule digit
|
87
|
+
[0-9]
|
88
|
+
end
|
89
|
+
|
90
|
+
rule dot
|
91
|
+
'.'
|
92
|
+
end
|
93
|
+
|
94
|
+
rule plus
|
95
|
+
'+'
|
96
|
+
end
|
97
|
+
|
98
|
+
rule minus
|
99
|
+
'-'
|
100
|
+
end
|
101
|
+
|
102
|
+
rule sign
|
103
|
+
plus / minus
|
104
|
+
end
|
105
|
+
|
106
|
+
rule decIntNoSign
|
107
|
+
digit+
|
108
|
+
end
|
109
|
+
|
110
|
+
rule decIntSignable
|
111
|
+
sign? decIntNoSign
|
112
|
+
end
|
113
|
+
|
114
|
+
rule decFraction
|
115
|
+
dot decIntNoSign
|
116
|
+
end
|
117
|
+
|
118
|
+
rule signDotDecFrac
|
119
|
+
sign? dot decIntNoSign
|
120
|
+
end
|
121
|
+
|
122
|
+
rule decIntDotFrac
|
123
|
+
decIntSignable dot
|
124
|
+
end
|
125
|
+
|
126
|
+
rule fullDecFrac
|
127
|
+
decIntSignable decFraction
|
128
|
+
end
|
129
|
+
|
130
|
+
rule fixedDecimal
|
131
|
+
fullDecFrac /signDotDecFrac / decIntDotFrac
|
132
|
+
end
|
133
|
+
|
134
|
+
# Hexacedimal digit
|
135
|
+
rule hexDigit
|
136
|
+
[0-9A-Fa-f]
|
137
|
+
end
|
138
|
+
|
139
|
+
# Alphanumeric
|
140
|
+
rule alphaNum
|
141
|
+
[0-9A-Za-z]
|
142
|
+
end
|
143
|
+
|
144
|
+
# dataMeta "Word" character, can be a part of a indentifier name.
|
145
|
+
rule wordChar
|
146
|
+
[0-9A-Za-z_]
|
147
|
+
end
|
148
|
+
|
149
|
+
rule dmWord
|
150
|
+
wordChar+
|
151
|
+
end
|
152
|
+
|
153
|
+
# Class name, first uppercase, then any of the word components
|
154
|
+
rule className
|
155
|
+
AZ wordChar?
|
156
|
+
end
|
157
|
+
|
158
|
+
# Variable name, first
|
159
|
+
rule varName
|
160
|
+
[_a-z] wordChar? # allows single underscore as a var name.
|
161
|
+
end
|
162
|
+
|
163
|
+
# C-style comment to reuse everywhere
|
164
|
+
rule c_comment
|
165
|
+
'/*'
|
166
|
+
(
|
167
|
+
!'*/'
|
168
|
+
(. / "\n")
|
169
|
+
)*
|
170
|
+
'*/'
|
171
|
+
end
|
172
|
+
|
173
|
+
# C-Style whitespace
|
174
|
+
rule c_whitespace
|
175
|
+
c_comment / W
|
176
|
+
end
|
177
|
+
|
178
|
+
# C-Style End of Line comment
|
179
|
+
rule cEolComment
|
180
|
+
'//' (!"\n" .)* "\n"
|
181
|
+
end
|
182
|
+
|
183
|
+
rule string
|
184
|
+
'"' letters:( !'"' stringLetter )* '"' {
|
185
|
+
def fetch
|
186
|
+
letters.elements.map { |el| el.elements.last.fetch }.join
|
187
|
+
end
|
188
|
+
}
|
189
|
+
end
|
190
|
+
|
191
|
+
rule stringLetter
|
192
|
+
'\\' char:["ntr] {
|
193
|
+
def fetch
|
194
|
+
case char.text_value
|
195
|
+
when '"'; '"'
|
196
|
+
when 'n'; "\n"
|
197
|
+
when 'r'; 13.chr
|
198
|
+
when 't'; 9.chr
|
199
|
+
when '\\'; "\\"
|
200
|
+
else
|
201
|
+
raise ArgumentException, "Invalid string escape '#{char.text_value}'"
|
202
|
+
end
|
203
|
+
end
|
204
|
+
}
|
205
|
+
/
|
206
|
+
. {
|
207
|
+
def fetch
|
208
|
+
text_value
|
209
|
+
end
|
210
|
+
}
|
211
|
+
end
|
212
|
+
|
213
|
+
rule sn
|
214
|
+
sN?
|
215
|
+
end
|
216
|
+
|
217
|
+
# borrowed from: http://whitequark.org/blog/2011/09/08/treetop-typical-errors
|
218
|
+
# C End of line
|
219
|
+
rule cEol
|
220
|
+
( ( S "\n" / s cEolComment / s "\n" ) s cEolComment? )+
|
221
|
+
end
|
222
|
+
|
223
|
+
# Scripting EOL, with mesh comment
|
224
|
+
rule sEol
|
225
|
+
( ( S "\n" / s meshLineComment / s "\n" ) s meshLineComment? )+
|
226
|
+
end
|
227
|
+
|
228
|
+
rule s
|
229
|
+
S?
|
230
|
+
end
|
231
|
+
|
232
|
+
rule S
|
233
|
+
[ \t]+
|
234
|
+
end
|
235
|
+
|
236
|
+
end
|
@@ -0,0 +1,229 @@
|
|
1
|
+
# Borrowed from: https://github.com/juretta/uri-templates/blob/master/grammar/uri_template.treetop
|
2
|
+
|
3
|
+
grammar UriTemplate
|
4
|
+
|
5
|
+
include DataMetaCommonsRoot
|
6
|
+
|
7
|
+
rule uri_template
|
8
|
+
uri_element more_elements:(uri_element)* {
|
9
|
+
def value(env={})
|
10
|
+
uri_element.value(env) << more_elements.elements.map{|el| el.value(env)}.join
|
11
|
+
end
|
12
|
+
}
|
13
|
+
end
|
14
|
+
|
15
|
+
rule uri_element
|
16
|
+
expansion / uri_part
|
17
|
+
end
|
18
|
+
|
19
|
+
rule expansion
|
20
|
+
'{'
|
21
|
+
c:(
|
22
|
+
var
|
23
|
+
/
|
24
|
+
operator
|
25
|
+
)
|
26
|
+
'}'
|
27
|
+
{
|
28
|
+
def value(env = {})
|
29
|
+
c.value(env)
|
30
|
+
end
|
31
|
+
}
|
32
|
+
end
|
33
|
+
|
34
|
+
rule uri_part
|
35
|
+
(unreserved / reserved / pct_encoded) {
|
36
|
+
def value(env = {})
|
37
|
+
text_value
|
38
|
+
end
|
39
|
+
}
|
40
|
+
end
|
41
|
+
|
42
|
+
rule arg
|
43
|
+
(reserved / unreserved / pct_encoded)*
|
44
|
+
end
|
45
|
+
|
46
|
+
rule op
|
47
|
+
(
|
48
|
+
'opt' {
|
49
|
+
# If each variable is undefined or an empty list then substitute the
|
50
|
+
# empty string, otherwise substitute the value of 'arg'.
|
51
|
+
def exec
|
52
|
+
lambda do |env, arg, vars|
|
53
|
+
ret = ''
|
54
|
+
vars.split(',').each do |var|
|
55
|
+
if env[var] && (env[var].respond_to?(:length) ? env[var].length > 0 : true)
|
56
|
+
ret = "#{arg}"
|
57
|
+
break
|
58
|
+
end
|
59
|
+
end
|
60
|
+
ret
|
61
|
+
end
|
62
|
+
end
|
63
|
+
}
|
64
|
+
/
|
65
|
+
'neg' {
|
66
|
+
# If all of the variables are un-defined or empty then substitute the
|
67
|
+
# value of arg, otherwise substitute the empty string.
|
68
|
+
def exec
|
69
|
+
lambda do |env, arg, vars|
|
70
|
+
ret = "#{arg}"
|
71
|
+
vars.split(',').each do |var|
|
72
|
+
if !env[var].to_s.blank?
|
73
|
+
ret = ""
|
74
|
+
break
|
75
|
+
end
|
76
|
+
end
|
77
|
+
ret
|
78
|
+
end
|
79
|
+
end
|
80
|
+
}
|
81
|
+
/
|
82
|
+
'prefix' {
|
83
|
+
# The prefix operator MUST only have one variable in its expansion. If
|
84
|
+
# the variable is defined and non-empty then substitute the value of
|
85
|
+
# arg followed by the value of the variable, otherwise substitute the
|
86
|
+
# empty string.
|
87
|
+
def exec
|
88
|
+
lambda do |env, prefix, vars|
|
89
|
+
v = env[vars]
|
90
|
+
if vars =~ /([^=]+)=([^=]+)/
|
91
|
+
var, default = $1.dup, $2.dup
|
92
|
+
v = env[var]
|
93
|
+
v = default if v.to_s.blank?
|
94
|
+
end
|
95
|
+
!v.blank? ? "#{prefix}#{UriTemplate::Encoder.encode(v)}" : ""
|
96
|
+
end
|
97
|
+
end
|
98
|
+
}
|
99
|
+
/
|
100
|
+
'suffix' {
|
101
|
+
# The suffix operator MUST only have one variable in its expansion. If
|
102
|
+
# the variable is defined and non-empty then substitute the value of
|
103
|
+
# the variable followed by the value of arg, otherwise substitute the
|
104
|
+
# empty string.
|
105
|
+
def exec
|
106
|
+
lambda do |env, append, vars|
|
107
|
+
v = env[vars]
|
108
|
+
if vars =~ /([^=]+)=([^=]+)/
|
109
|
+
var, default = $1.dup, $2.dup
|
110
|
+
v = env[var]
|
111
|
+
v = default if v.to_s.blank?
|
112
|
+
end
|
113
|
+
if v
|
114
|
+
val = UriTemplate::Encoder.encode(v)
|
115
|
+
!val.blank? ? "#{val}#{append}" : ""
|
116
|
+
else
|
117
|
+
''
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
}
|
122
|
+
/
|
123
|
+
'join' {
|
124
|
+
# For each variable that is defined and non-empty create a keyvalue
|
125
|
+
# string that is the concatenation of the variable name, "=", and the
|
126
|
+
# variable value. Concatenate more than one keyvalue string with
|
127
|
+
# intervening values of arg to create the substitution value.
|
128
|
+
def exec
|
129
|
+
lambda do |env, joinop, vars|
|
130
|
+
vars.split(',').map do |var|
|
131
|
+
v = env[var]
|
132
|
+
if var =~ /([^=]+)=([^=]+)/
|
133
|
+
var, default = $1.dup, $2.dup
|
134
|
+
v = env[var]
|
135
|
+
v = default if v.to_s.blank?
|
136
|
+
end
|
137
|
+
"#{var}=#{UriTemplate::Encoder.encode(v)}" if v
|
138
|
+
end.compact.join(joinop)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
}
|
142
|
+
/
|
143
|
+
'list' {
|
144
|
+
# The listjoin operator MUST have only one variable in its expansion
|
145
|
+
# and that variable must be a list. More than one variable is an
|
146
|
+
# error. If the list is non-empty then substitute the concatenation of
|
147
|
+
# all the list members with intervening values of arg. If the list is
|
148
|
+
# empty or the variable is undefined them substitute the empty string.
|
149
|
+
def exec
|
150
|
+
lambda do |env, joinop, vars|
|
151
|
+
return "" unless env[vars].respond_to? :each
|
152
|
+
env[vars].map do |v|
|
153
|
+
"#{UriTemplate::Encoder.encode(v)}" if v
|
154
|
+
end.compact.join(joinop)
|
155
|
+
end
|
156
|
+
end
|
157
|
+
}
|
158
|
+
)
|
159
|
+
end
|
160
|
+
|
161
|
+
rule vars
|
162
|
+
var ("," var)*
|
163
|
+
end
|
164
|
+
|
165
|
+
rule vardefault
|
166
|
+
(unreserved / pct_encoded)*
|
167
|
+
end
|
168
|
+
|
169
|
+
rule var
|
170
|
+
varname defaults:('=' vardefault)* {
|
171
|
+
def value(env={} )
|
172
|
+
return UriTemplate::Encoder.encode(env[name]) unless env[name].nil?
|
173
|
+
defaults.text_value.gsub(/=/, '')
|
174
|
+
end
|
175
|
+
|
176
|
+
def name
|
177
|
+
varname.text_value
|
178
|
+
end
|
179
|
+
}
|
180
|
+
end
|
181
|
+
|
182
|
+
rule operator
|
183
|
+
"-" op "|" arg "|" vars {
|
184
|
+
def value(env={})
|
185
|
+
op.exec.call(env, arg.text_value, vars.text_value) # if op.respond_to?(:exec)
|
186
|
+
end
|
187
|
+
}
|
188
|
+
end
|
189
|
+
|
190
|
+
rule varname
|
191
|
+
[a-zA-Z0-9] [a-zA-Z0-9_.-]*
|
192
|
+
end
|
193
|
+
|
194
|
+
rule alpha
|
195
|
+
[A-Za-z_]
|
196
|
+
end
|
197
|
+
|
198
|
+
rule alphanumeric
|
199
|
+
alpha / [0-9]
|
200
|
+
end
|
201
|
+
|
202
|
+
# see http://www.ietf.org/rfc/rfc3986.txt
|
203
|
+
rule unreserved
|
204
|
+
alphanumeric / "-" / "." / "_" / "~"
|
205
|
+
end
|
206
|
+
|
207
|
+
# see http://www.ietf.org/rfc/rfc3986.txt
|
208
|
+
rule pct_encoded
|
209
|
+
'%' hexdig hexdig
|
210
|
+
end
|
211
|
+
|
212
|
+
rule hexdig
|
213
|
+
[a-fA-F0-9]
|
214
|
+
end
|
215
|
+
|
216
|
+
# see http://www.ietf.org/rfc/rfc3986.txt
|
217
|
+
rule reserved
|
218
|
+
gen_delims / sub_delims
|
219
|
+
end
|
220
|
+
|
221
|
+
rule gen_delims
|
222
|
+
":" / "/" / "?" / "#" / "[" / "]" / "@"
|
223
|
+
end
|
224
|
+
|
225
|
+
rule sub_delims
|
226
|
+
"!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
|
227
|
+
end
|
228
|
+
|
229
|
+
end
|
@@ -0,0 +1,139 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
2
|
+
|
3
|
+
require 'uri'
|
4
|
+
require 'treetop'
|
5
|
+
|
6
|
+
module DataMetaParse
|
7
|
+
=begin rdoc
|
8
|
+
DataMeta URI with all the parts.
|
9
|
+
|
10
|
+
The user story:
|
11
|
+
|
12
|
+
* DataMeta URIs are used in DataMeta Scripts to specify all aspects of a data set identity and location.
|
13
|
+
* For physical access, a URI may be disassembled using this grammar and parser, the parts obtained so may be used
|
14
|
+
to access concrete physical resources.
|
15
|
+
|
16
|
+
@!attribute [r] proto
|
17
|
+
@return [String] the protocol part, such as +http+, +ftp+, +socparc+ etc
|
18
|
+
|
19
|
+
@!attribute [r] user
|
20
|
+
@return [String] the user id part of the URI, can be +nil+ and for some URIs may be in properties
|
21
|
+
|
22
|
+
@!attribute [r] pwd
|
23
|
+
@return [String] the password part of the URI, can be +nil+ and for some URIs may be in properties
|
24
|
+
|
25
|
+
@!attribute [r] host
|
26
|
+
@return [String] the host part of the URI
|
27
|
+
|
28
|
+
@!attribute [r] port
|
29
|
+
@return [Fixnum] the port number specified in the URI, can be +nil+
|
30
|
+
|
31
|
+
@!attribute [r] path
|
32
|
+
@return [String] for the +file+ protocol, path as specified, full or relative. For any other URI, the part between
|
33
|
+
the closing '/' after the +host:port+ part and the query part starting with '?'.
|
34
|
+
This means, for all other protocols except +file+, the path part will never have an initial slash.
|
35
|
+
|
36
|
+
@!attribute [r] props
|
37
|
+
@return [Hash] hash of properties keyed by the property name and pointing to a value if any
|
38
|
+
|
39
|
+
=end
|
40
|
+
class Uri
|
41
|
+
attr_reader :proto, :user, :pwd, :host, :port, :path, :props
|
42
|
+
|
43
|
+
=begin rdoc
|
44
|
+
Creates an instance of the object.
|
45
|
+
|
46
|
+
@param [String] proto see the property {#proto}
|
47
|
+
@param [String] user see the property {#user}
|
48
|
+
@param [String] pwd see the property {#pwd}
|
49
|
+
@param [String] host see the property {#host}
|
50
|
+
@param [String] port see the property {#port}
|
51
|
+
@param [String] path see the property {#path}
|
52
|
+
@param [String] props see the property {#props}
|
53
|
+
=end
|
54
|
+
def initialize(proto, user, pwd, host, port, path, props)
|
55
|
+
raise ArgumentError, 'Password specified but user not' if !user && pwd
|
56
|
+
raise ArgumentError, 'For file protocol, only path can be specified' if proto == 'file' && (
|
57
|
+
user || pwd || host || port || !props.empty?)
|
58
|
+
|
59
|
+
@proto, @user, @pwd, @host, @port, @path, @props = proto, user, pwd, host, port, path, props
|
60
|
+
end
|
61
|
+
|
62
|
+
=begin rdoc
|
63
|
+
Equality to the other
|
64
|
+
=end
|
65
|
+
def ==(other)
|
66
|
+
@proto == other.proto && @user == other.user && @pwd == other.pwd && @host == other.host &&
|
67
|
+
@port == other.port && @path == other.path && @props.eql?(other.props)
|
68
|
+
end
|
69
|
+
|
70
|
+
=begin rdoc
|
71
|
+
Same as the {#==}
|
72
|
+
=end
|
73
|
+
def eql?(other); self == other end
|
74
|
+
|
75
|
+
=begin rdoc
|
76
|
+
Loads the grammar - has to be done only once per RVM start. Relies on loading the basics.
|
77
|
+
=end
|
78
|
+
def self.loadRulz
|
79
|
+
Treetop.load(File.join(File.dirname(__FILE__), 'urlDataMeta'))
|
80
|
+
end
|
81
|
+
|
82
|
+
=begin rdoc
|
83
|
+
Instance to textual.
|
84
|
+
=end
|
85
|
+
def to_s
|
86
|
+
if @proto == 'file'
|
87
|
+
"file://#{@path}"
|
88
|
+
else
|
89
|
+
result = ''
|
90
|
+
result << @proto << '://'
|
91
|
+
result << URI.encode_www_form_component(@user) if @user
|
92
|
+
result << ':' << URI.encode_www_form_component(@pwd) if @user && @pwd
|
93
|
+
result << '@' if @user
|
94
|
+
result << @host
|
95
|
+
result << ':' << @port.to_s if @port
|
96
|
+
result << '/' if @path || !@props.empty?
|
97
|
+
result << @path if @path
|
98
|
+
|
99
|
+
result << '?' << @props.keys.map { |k|
|
100
|
+
v=@props[k]; v ? "#{k}=#{URI.encode_www_form_component(v)}" : "#{k}"
|
101
|
+
}.join('&') unless @props.empty?
|
102
|
+
|
103
|
+
result
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
=begin rdoc
|
108
|
+
Parses the source into the instance of the object.
|
109
|
+
@param [String] source the source, the URI specification to parse into the instance of this class
|
110
|
+
=end
|
111
|
+
def self.parse(source)
|
112
|
+
fileSignature = 'file://'
|
113
|
+
if source.start_with?(fileSignature)
|
114
|
+
Uri.new('file', nil, nil, nil, nil, source[fileSignature.length..-1], {})
|
115
|
+
else
|
116
|
+
parser = DataMetaUrlParser.new
|
117
|
+
ast = parser.parse(source)
|
118
|
+
return nil unless ast
|
119
|
+
proto = ast.proto.text_value
|
120
|
+
user = ast.user? ? ast.userPwd.user.text_value : nil
|
121
|
+
pwd = ast.pwd? ? URI.decode_www_form_component(ast.userPwd.password) : nil
|
122
|
+
host = ast.host.text_value
|
123
|
+
port = ast.port? ? ast.port.number : nil
|
124
|
+
path = ast.path? ? ast.uTail.path : nil
|
125
|
+
query = ast.query? ? ast.uTail.query : nil
|
126
|
+
props = {}
|
127
|
+
if query
|
128
|
+
pairs = query.split('&')
|
129
|
+
pairs.each { |pairString|
|
130
|
+
key, val = pairString.split('=')
|
131
|
+
props[key] = val ? URI.decode_www_form_component(val) : nil # this is caused by ¶mA¶mB=b, in which case paramA will be nil
|
132
|
+
}
|
133
|
+
end
|
134
|
+
Uri.new(proto, user, pwd, host, port, path, props)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
end
|
139
|
+
end
|
@@ -0,0 +1,110 @@
|
|
1
|
+
grammar DataMetaUrl
|
2
|
+
|
3
|
+
# DataMeta URL definition, a part of a URL definition
|
4
|
+
|
5
|
+
include DataMetaCommonsRoot
|
6
|
+
|
7
|
+
rule dataMetaUri
|
8
|
+
proto:urlProtocol '://' userPwd:userSpec? host:hostName port:portSpec? uTail:urlTail? {
|
9
|
+
def user?
|
10
|
+
!userPwd.text_value.empty? && userPwd.name && !userPwd.name.empty?
|
11
|
+
end
|
12
|
+
def pwd?
|
13
|
+
!userPwd.text_value.empty? && userPwd.password && !userPwd.password.empty?
|
14
|
+
end
|
15
|
+
def port?
|
16
|
+
!port.text_value.empty?
|
17
|
+
end
|
18
|
+
|
19
|
+
def tail?
|
20
|
+
!uTail.text_value.empty?
|
21
|
+
end
|
22
|
+
|
23
|
+
def path?
|
24
|
+
!uTail.text_value.empty? && uTail.path
|
25
|
+
end
|
26
|
+
def path
|
27
|
+
uTail.path
|
28
|
+
end
|
29
|
+
def query?
|
30
|
+
!uTail.text_value.empty? && uTail.query
|
31
|
+
end
|
32
|
+
def query
|
33
|
+
uTail.query
|
34
|
+
end
|
35
|
+
}
|
36
|
+
end
|
37
|
+
|
38
|
+
# Order is important, if you put "http" in front of the "https", https match will fail
|
39
|
+
rule urlProtocol
|
40
|
+
'https' / 'http' / 'ftp' / 'hdfs' / 'mysql' / 'oracle'
|
41
|
+
end
|
42
|
+
|
43
|
+
rule urlTail
|
44
|
+
'/' uPath:urlPath? uQuery:urlQuery? {
|
45
|
+
def path
|
46
|
+
uPath.text_value.empty? ? nil : uPath.text_value
|
47
|
+
end
|
48
|
+
def query
|
49
|
+
uQuery.text_value.empty? ? nil : uQuery.query
|
50
|
+
end
|
51
|
+
}
|
52
|
+
end
|
53
|
+
|
54
|
+
rule urlPath
|
55
|
+
(urlPathChar+)
|
56
|
+
end
|
57
|
+
|
58
|
+
rule urlPathChar
|
59
|
+
wordChar / '-' / '/' / '.'
|
60
|
+
#!'?' # - this causes infinite loop
|
61
|
+
end
|
62
|
+
|
63
|
+
rule urlQuery
|
64
|
+
'?' queryText:(notBlanks+) {
|
65
|
+
def query
|
66
|
+
queryText.text_value.empty? ? nil : queryText.text_value
|
67
|
+
end
|
68
|
+
}
|
69
|
+
end
|
70
|
+
|
71
|
+
rule hostChar
|
72
|
+
wordChar / '-' / '.'
|
73
|
+
end
|
74
|
+
|
75
|
+
rule hostName
|
76
|
+
hostChar+
|
77
|
+
end
|
78
|
+
|
79
|
+
rule portSpec
|
80
|
+
':' portNumber:(digit+) {
|
81
|
+
def number # port number or nil if none, can not default it here because it depends on the protocol
|
82
|
+
text_value.empty? ? nil : portNumber.text_value.to_i
|
83
|
+
end
|
84
|
+
}
|
85
|
+
end
|
86
|
+
|
87
|
+
rule notAtSymbol
|
88
|
+
!'@' .
|
89
|
+
end
|
90
|
+
|
91
|
+
rule pwdSpec
|
92
|
+
':' pwd:(notAtSymbol+) {
|
93
|
+
def empty?
|
94
|
+
text_value.empty?
|
95
|
+
end
|
96
|
+
}
|
97
|
+
end
|
98
|
+
|
99
|
+
rule userSpec
|
100
|
+
user:(wordChar+) pwdOpt:pwdSpec? '@' {
|
101
|
+
def name
|
102
|
+
user.text_value.empty? ? nil : user.text_value
|
103
|
+
end
|
104
|
+
def password
|
105
|
+
pwdOpt.text_value.empty? ? nil : pwdOpt.pwd.text_value
|
106
|
+
end
|
107
|
+
}
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
grammar TestNumbers
|
2
|
+
|
3
|
+
include DataMetaCommonsRoot
|
4
|
+
rule testNumbers
|
5
|
+
singleDecDigitNoSign:decIntNoSign W mulDecDigitNoSign:decIntNoSign
|
6
|
+
W signableIntNoSign:decIntSignable W signableIntPlus:decIntSignable W signableIntMinus:decIntSignable
|
7
|
+
W singleDigSingleDigFrac:fixedDecimal
|
8
|
+
W singDigDoubleDigFrac:fixedDecimal
|
9
|
+
W doubleDigSingleDigFrac:fixedDecimal
|
10
|
+
W doubleDigDotFrac:fixedDecimal
|
11
|
+
W dotDigitsFrac:fixedDecimal
|
12
|
+
W singleDigSingleDigFracPlus:fixedDecimal
|
13
|
+
W singDigDoubleDigFracPlus:fixedDecimal
|
14
|
+
W doubleDigSingleDigFracPlus:fixedDecimal
|
15
|
+
W doubleDigDotFracPlus:fixedDecimal
|
16
|
+
W dotDigitsFracPlus:fixedDecimal
|
17
|
+
W singleDigSingleDigFracMinus:fixedDecimal
|
18
|
+
W singDigDoubleDigFracMinus:fixedDecimal
|
19
|
+
W doubleDigSingleDigFracMinus:fixedDecimal
|
20
|
+
W doubleDigDotFracMinus:fixedDecimal
|
21
|
+
W dotDigitsFracMinus:fixedDecimal
|
22
|
+
end
|
23
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,6 @@
|
|
1
|
+
## keep this underscore naming in the test subdir, it's easier to append files names to test
|
2
|
+
%w(stringio test/unit logger).each { |r| require r }
|
3
|
+
# this is expected to run from the project root, normally by the rake file
|
4
|
+
require './lib/dataMetaParse'
|
5
|
+
require './lib/dataMetaParse/uriDataMeta'
|
6
|
+
require './test/utils'
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# keep this underscore naming in the test subdir, it's easier to append files names to test
|
2
|
+
require './test/utils'
|
3
|
+
|
4
|
+
# Unit test cases for the DataMetaParse
|
5
|
+
# See for instance:
|
6
|
+
# - test_full
|
7
|
+
# Assertions: http://ruby-doc.org/stdlib-1.9.3/libdoc/test/unit/rdoc/Test/Unit/Assertions.html
|
8
|
+
class TestNumberParse < Test::Unit::TestCase
|
9
|
+
|
10
|
+
include DataMetaParseTestUtil
|
11
|
+
L.info "Loaded Base Rules: #{BASE_RULS}"
|
12
|
+
# Loads the grammars, creates a parser
|
13
|
+
def setup
|
14
|
+
numbers = Treetop.load('./test/numbers')
|
15
|
+
L.info "Loaded numbers: #{numbers.inspect}"
|
16
|
+
|
17
|
+
@parser = TestNumbersParser.new
|
18
|
+
L.info "#{@parser.inspect}"
|
19
|
+
end
|
20
|
+
|
21
|
+
=begin rdoc
|
22
|
+
Numbers parsing test
|
23
|
+
=end
|
24
|
+
def test_numbers
|
25
|
+
ast = DataMetaParse.parse(@parser,
|
26
|
+
%q<1 123 4321 +4321 -4321 1.1 1.23 12.3 12. .12 +1.1 +1.23 +12.3 +12. +.12 -1.1 -1.23 -12.3 -12. -.12>)
|
27
|
+
raise 'Numbers parse unsuccessful' unless ast
|
28
|
+
raise ast if ast.is_a?(DataMetaParse::Err)
|
29
|
+
L.info "AST:\n#{ast.inspect}"
|
30
|
+
assert_equal(1, ast.singleDecDigitNoSign.text_value.to_i)
|
31
|
+
assert_equal(123, ast.mulDecDigitNoSign.text_value.to_i)
|
32
|
+
assert_equal(4321, ast.signableIntNoSign.text_value.to_i)
|
33
|
+
assert_equal(4321, ast.signableIntPlus.text_value.to_i)
|
34
|
+
assert_equal(-4321, ast.signableIntMinus.text_value.to_i)
|
35
|
+
assert_equal(1.1, ast.singleDigSingleDigFrac.text_value.to_f)
|
36
|
+
assert_equal(1.23, ast.singDigDoubleDigFrac.text_value.to_f)
|
37
|
+
assert_equal(12.3, ast.doubleDigSingleDigFrac.text_value.to_f)
|
38
|
+
assert_equal(12.0, ast.doubleDigDotFrac.text_value.to_f)
|
39
|
+
assert_equal(0.12, ast.dotDigitsFrac.text_value.to_f)
|
40
|
+
assert_equal(1.1, ast.singleDigSingleDigFracPlus.text_value.to_f)
|
41
|
+
assert_equal(1.23, ast.singDigDoubleDigFracPlus.text_value.to_f)
|
42
|
+
assert_equal(12.3, ast.doubleDigSingleDigFracPlus.text_value.to_f)
|
43
|
+
assert_equal(12.0, ast.doubleDigDotFracPlus.text_value.to_f)
|
44
|
+
assert_equal(0.12, ast.dotDigitsFracPlus.text_value.to_f)
|
45
|
+
assert_equal(-1.1, ast.singleDigSingleDigFracMinus.text_value.to_f)
|
46
|
+
assert_equal(-1.23, ast.singDigDoubleDigFracMinus.text_value.to_f)
|
47
|
+
assert_equal(-12.3, ast.doubleDigSingleDigFracMinus.text_value.to_f)
|
48
|
+
assert_equal(-12.0, ast.doubleDigDotFracMinus.text_value.to_f)
|
49
|
+
assert_equal(-0.12, ast.dotDigitsFracMinus.text_value.to_f)
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
@@ -0,0 +1,162 @@
|
|
1
|
+
# keep this underscore naming in the test subdir, it's easier to append files names to test
|
2
|
+
require './test/utils'
|
3
|
+
|
4
|
+
# Unit test cases for the DataMetaParse
|
5
|
+
# See for instance:
|
6
|
+
# - test_full
|
7
|
+
# Assertions: https://ruby-doc.org/stdlib-2.1.4/libdoc/test/unit/rdoc/Test/Unit/Assertions.html
|
8
|
+
#noinspection RubyStringKeysInHashInspection
|
9
|
+
class TestDataMetaParse < Test::Unit::TestCase
|
10
|
+
|
11
|
+
include DataMetaParseTestUtil
|
12
|
+
|
13
|
+
# Stub
|
14
|
+
def setup
|
15
|
+
DataMetaParse::Uri.loadRulz
|
16
|
+
end
|
17
|
+
|
18
|
+
=begin rdoc
|
19
|
+
Checks one URI specification, reports results
|
20
|
+
=end
|
21
|
+
def assertUri(uriSpec, expected)
|
22
|
+
uri = DataMetaParse::Uri.parse(uriSpec)
|
23
|
+
if uri
|
24
|
+
L.info %Q<parsed "#{uriSpec}"; protocol: #{uri.proto}, user: #{uri.user}, pwd=#{uri.pwd}, host:#{uri.host}> +
|
25
|
+
", port=#{uri.port}, path:#{uri.path}, props: #{uri.props.inspect}\nre:#{uri}"
|
26
|
+
else
|
27
|
+
L.info %Q<parsed "#{uriSpec}", no match>
|
28
|
+
end
|
29
|
+
|
30
|
+
assert_equal(expected, uri) # assert_equal goes by object.eql?
|
31
|
+
end
|
32
|
+
|
33
|
+
# Tests General URI grammar
|
34
|
+
def test_GeneralUriGrammar
|
35
|
+
#proto, user, pwd, host, port, path, props
|
36
|
+
assertUri(%q<http://www.domain.tld>, DataMetaParse::Uri.new('http', nil, nil, 'www.domain.tld', nil, nil, {}))
|
37
|
+
assertUri(%q<http://www.domain.tld/>, DataMetaParse::Uri.new('http', nil, nil, 'www.domain.tld', nil, nil, {}))
|
38
|
+
|
39
|
+
assertUri(%q<http://www.domain.tld:9090>,
|
40
|
+
DataMetaParse::Uri.new('http', nil, nil, 'www.domain.tld', 9090, nil, {}))
|
41
|
+
|
42
|
+
assertUri(%q<http://www.domain.tld:9090/>,
|
43
|
+
DataMetaParse::Uri.new('http', nil, nil, 'www.domain.tld', 9090, nil, {}))
|
44
|
+
|
45
|
+
assertUri(%q<http://joe_1@www.domain.tld>,
|
46
|
+
DataMetaParse::Uri.new('http', 'joe_1', nil, 'www.domain.tld', nil, nil, {}))
|
47
|
+
|
48
|
+
assertUri(%q<https://joe_1@www.domain.tld>,
|
49
|
+
DataMetaParse::Uri.new('https', 'joe_1', nil, 'www.domain.tld', nil, nil, {}))
|
50
|
+
|
51
|
+
assertUri(%q<http://joe_1:secret@www.domain.tld>,
|
52
|
+
DataMetaParse::Uri.new('http', 'joe_1', 'secret', 'www.domain.tld', nil, nil, {}))
|
53
|
+
|
54
|
+
assertUri(%q<https://joe_1:secr%26et@www.domain.tld>,
|
55
|
+
DataMetaParse::Uri.new('https', 'joe_1', 'secr&et', 'www.domain.tld', nil, nil, {}))
|
56
|
+
|
57
|
+
assertUri(%q<ftp://www.domain.tld/path/dir>,
|
58
|
+
DataMetaParse::Uri.new('ftp', nil, nil, 'www.domain.tld', nil, 'path/dir', {}))
|
59
|
+
|
60
|
+
assertUri(%q<http://www.domain.tld/path/dir>,
|
61
|
+
DataMetaParse::Uri.new('http', nil, nil, 'www.domain.tld', nil, 'path/dir', {}))
|
62
|
+
|
63
|
+
assertUri(%q<http://www.domain.tld/path/dir?qa=aVal&qb=bVal>,
|
64
|
+
DataMetaParse::Uri.new('http', nil, nil, 'www.domain.tld', nil, 'path/dir',
|
65
|
+
{'qa' => 'aVal', 'qb' => 'bVal'}))
|
66
|
+
|
67
|
+
assertUri(%q<http://joe:secret@www.domain.tld/path/dir?qa=aVal&qb&qc=cVal>,
|
68
|
+
DataMetaParse::Uri.new('http', 'joe', 'secret', 'www.domain.tld', nil, 'path/dir',
|
69
|
+
{'qa' => 'aVal', 'qb' => nil, 'qc' => 'cVal'}))
|
70
|
+
|
71
|
+
assertUri(%q<http://joe:secret@www.domain.tld/path/dir?qa=a%2FVal&qb=b%26Val&qc>,
|
72
|
+
DataMetaParse::Uri.new('http', 'joe', 'secret', 'www.domain.tld', nil, 'path/dir',
|
73
|
+
{'qa' => 'a/Val', 'qb' => 'b&Val', 'qc' => nil}))
|
74
|
+
|
75
|
+
assertUri(%q<http://joe:secret@www.domain.tld:8443/path/dir?qa=a%2FVal&qb=b%26Val&qc>,
|
76
|
+
DataMetaParse::Uri.new('http', 'joe', 'secret', 'www.domain.tld', 8443, 'path/dir',
|
77
|
+
{'qa' => 'a/Val', 'qb' => 'b&Val', 'qc' => nil}))
|
78
|
+
|
79
|
+
assertUri(%q<file:///dir/otherDir/file.ext>,
|
80
|
+
DataMetaParse::Uri.new('file', nil, nil, nil, nil, '/dir/otherDir/file.ext', {}))
|
81
|
+
|
82
|
+
assertUri(%q<file://dir/otherDir/file.ext>,
|
83
|
+
DataMetaParse::Uri.new('file', nil, nil, nil, nil, 'dir/otherDir/file.ext', {}))
|
84
|
+
|
85
|
+
assertUri(%q<hdfs://node-geo-ss.vip.acme.com/dir/otherDir/file.ext?cluster=hadoopCluster&format=seqFile&blkSize=128M>,
|
86
|
+
DataMetaParse::Uri.new('hdfs', nil, nil, 'node-geo-ss.vip.acme.com', nil, 'dir/otherDir/file.ext',
|
87
|
+
{'cluster' => 'hadoopCluster', 'format' => 'seqFile', 'blkSize' => '128M'}))
|
88
|
+
|
89
|
+
assertUri(%q<hdfs://node-geo-ss.vip.acme.com:8020/dir/otherDir/file.ext?cluster=hadoopCluster&format=seqFile&blkSize=128M>,
|
90
|
+
DataMetaParse::Uri.new('hdfs', nil, nil, 'node-geo-ss.vip.acme.com', 8020, 'dir/otherDir/file.ext',
|
91
|
+
{'cluster' => 'hadoopCluster', 'format' => 'seqFile', 'blkSize' => '128M'}))
|
92
|
+
|
93
|
+
assertUri(%q<mysql://DM_USER:DataMeta_PWD@db-host:3306/database?sql=select%20*%20from%20entity%20where%20id%20%3D%201>,
|
94
|
+
DataMetaParse::Uri.new('mysql', 'DM_USER', 'DataMeta_PWD', 'db-host', 3306, 'database',
|
95
|
+
{'sql' => 'select * from entity where id = 1'}))
|
96
|
+
|
97
|
+
assertUri(%q<mysql://DM_USER:DataMeta_PWD@db-host/database?sql=select%20*%20from%20entity%20where%20id%20%3D%201>,
|
98
|
+
DataMetaParse::Uri.new('mysql', 'DM_USER', 'DataMeta_PWD', 'db-host', nil, 'database',
|
99
|
+
{'sql' => 'select * from entity where id = 1'}))
|
100
|
+
|
101
|
+
assertUri(%q<mysql://DM_USER:DataMeta_PWD@db-host/database?sql=select%20*%20from%20entity%20where%20id%20%3D%201%20and%20c2%20%3D%20%27abc%27>,
|
102
|
+
DataMetaParse::Uri.new('mysql', 'DM_USER', 'DataMeta_PWD', 'db-host', nil, 'database',
|
103
|
+
{'sql' => %q<select * from entity where id = 1 and c2 = 'abc'>}))
|
104
|
+
|
105
|
+
assertUri(%q<mysql://DM_USER:DataMeta_PWD@db-host/database?sql=select%20*%20from%20entity%20where%20id%20!%3D%201>,
|
106
|
+
DataMetaParse::Uri.new('mysql', 'DM_USER', 'DataMeta_PWD', 'db-host', nil, 'database',
|
107
|
+
{'sql' => %q|select * from entity where id != 1|}))
|
108
|
+
|
109
|
+
assertUri(%q<mysql://DM_USER:DataMeta_PWD@db-host/database?sql=select%20*%20from%20entity%20where%20id%20%3C%3E%201>,
|
110
|
+
DataMetaParse::Uri.new('mysql', 'DM_USER', 'DataMeta_PWD', 'db-host', nil, 'database',
|
111
|
+
{'sql' => %q|select * from entity where id <> 1|}))
|
112
|
+
|
113
|
+
assertUri(%q<mysql://DM_USER:DataMeta_PWD@db-host/database?sql=select%20*%20from%20entity%20where%20id%20%3E%3D%201>,
|
114
|
+
DataMetaParse::Uri.new('mysql', 'DM_USER', 'DataMeta_PWD', 'db-host', nil, 'database',
|
115
|
+
{'sql' => %q|select * from entity where id >= 1|}))
|
116
|
+
|
117
|
+
assertUri(%q<mysql://DM_USER:DataMeta_PWD@db-host/database?sql=select%20*%20from%20database.entity%20where%20id%20%3E%3D%201>,
|
118
|
+
DataMetaParse::Uri.new('mysql', 'DM_USER', 'DataMeta_PWD', 'db-host', nil, 'database',
|
119
|
+
{'sql' => %q|select * from database.entity where id >= 1|}))
|
120
|
+
|
121
|
+
assertUri(%q<mysql://DM_USER:DataMeta_PWD@db-host/database?sql=select%20*%20from%20entity%20where%20id%20in%20(1%2C2%2C3)>,
|
122
|
+
DataMetaParse::Uri.new('mysql', 'DM_USER', 'DataMeta_PWD', 'db-host', nil, 'database',
|
123
|
+
{'sql' => %q|select * from entity where id in (1,2,3)|}))
|
124
|
+
|
125
|
+
assertUri(%q<oracle://DM_USER:DataMeta_PWD@db-host:3306/database?sql=select%20*%20from%20entity%20where%20id%20%3D%201>,
|
126
|
+
DataMetaParse::Uri.new('oracle', 'DM_USER', 'DataMeta_PWD', 'db-host', 3306, 'database',
|
127
|
+
{'sql' => 'select * from entity where id = 1'}))
|
128
|
+
|
129
|
+
assertUri(%q<mysql://DM_USER:DataMeta_PWD@db-host/database?sql=select%20*%20from%20database.entity%20where%20id%20like%20%27%25a%25%27>,
|
130
|
+
DataMetaParse::Uri.new('mysql', 'DM_USER', 'DataMeta_PWD', 'db-host', nil, 'database',
|
131
|
+
{'sql' => %q|select * from database.entity where id like '%a%'|}))
|
132
|
+
end
|
133
|
+
|
134
|
+
=begin rdoc
|
135
|
+
Check bad URLs, must raise errors:
|
136
|
+
=end
|
137
|
+
def test_badUris
|
138
|
+
assert_raise(ArgumentError) {
|
139
|
+
DataMetaParse::Uri.new('file', 'blah', nil, nil, nil, 'dir/otherDir/file.ext', {})
|
140
|
+
}
|
141
|
+
|
142
|
+
assert_raise(ArgumentError) {
|
143
|
+
DataMetaParse::Uri.new('file', nil, 'blah', nil, nil, 'dir/otherDir/file.ext', {})
|
144
|
+
}
|
145
|
+
|
146
|
+
assert_raise(ArgumentError) {
|
147
|
+
DataMetaParse::Uri.new('file', nil, nil, 'blah', nil, 'dir/otherDir/file.ext', {})
|
148
|
+
}
|
149
|
+
|
150
|
+
assert_raise(ArgumentError) {
|
151
|
+
DataMetaParse::Uri.new('file', nil, nil, nil, 8080, 'dir/otherDir/file.ext', {})
|
152
|
+
}
|
153
|
+
|
154
|
+
assert_raise(ArgumentError) { # password but no user, no good:
|
155
|
+
DataMetaParse::Uri.new('http', nil, 'secret', 'www.domain.tld', nil, 'path/dir',
|
156
|
+
{'qa' => 'aVal', 'qb' => 'bVal'})
|
157
|
+
}
|
158
|
+
# Hadoop Good Practice: disallow hdfs specifications without namenode
|
159
|
+
assert_equal(nil, DataMetaParse::Uri.parse('hdfs:///dir/otherDir/file.ext'))
|
160
|
+
|
161
|
+
end
|
162
|
+
end
|
data/test/utils.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
|
2
|
+
=begin rdoc
|
3
|
+
Utilities for testing
|
4
|
+
=end
|
5
|
+
module DataMetaParseTestUtil
|
6
|
+
L = Logger.new('parseTests.log', 0, 10000)
|
7
|
+
L.level = Logger::DEBUG
|
8
|
+
L.datetime_format = '%Y-%m-%d %H:%M:%S'
|
9
|
+
# same as: DataMetaParse.loadBaseRulz
|
10
|
+
BASE_RULS = Treetop.load('./lib/dataMetaParse/basic')
|
11
|
+
|
12
|
+
end
|
metadata
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: dataMetaParse
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Michael Bergens
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-01-15 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: treetop
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.6'
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 1.6.8
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '1.6'
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 1.6.8
|
33
|
+
description: DataMeta Parser commons; common rules and some reusable grammars
|
34
|
+
email: michael.bergens@gmail.com
|
35
|
+
executables: []
|
36
|
+
extensions: []
|
37
|
+
extra_rdoc_files: []
|
38
|
+
files:
|
39
|
+
- ".yardopts"
|
40
|
+
- History.md
|
41
|
+
- PostInstall.txt
|
42
|
+
- README.md
|
43
|
+
- Rakefile
|
44
|
+
- lib/dataMetaParse.rb
|
45
|
+
- lib/dataMetaParse/basic.treetop
|
46
|
+
- lib/dataMetaParse/uri.treetop
|
47
|
+
- lib/dataMetaParse/uriDataMeta.rb
|
48
|
+
- lib/dataMetaParse/urlDataMeta.treetop
|
49
|
+
- test/numbers.treetop
|
50
|
+
- test/test_helper.rb
|
51
|
+
- test/test_numbers.rb
|
52
|
+
- test/test_uriParser.rb
|
53
|
+
- test/utils.rb
|
54
|
+
homepage: https://github.com/eBayDataMeta
|
55
|
+
licenses:
|
56
|
+
- Apache-2.0
|
57
|
+
metadata: {}
|
58
|
+
post_install_message:
|
59
|
+
rdoc_options: []
|
60
|
+
require_paths:
|
61
|
+
- lib
|
62
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: 2.0.0
|
67
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '0'
|
72
|
+
requirements:
|
73
|
+
- No specific requirements
|
74
|
+
rubyforge_project:
|
75
|
+
rubygems_version: 2.5.1
|
76
|
+
signing_key:
|
77
|
+
specification_version: 4
|
78
|
+
summary: DataMeta Parser commons
|
79
|
+
test_files:
|
80
|
+
- test/test_numbers.rb
|
81
|
+
- test/test_uriParser.rb
|
82
|
+
- test/numbers.treetop
|
83
|
+
- test/test_helper.rb
|