sneaql 0.0.13-java → 0.0.15-java

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,183 @@
1
+ module Sneaql
2
+ module Core
3
+ @@valid_tokenizer_states = [
4
+ :outside_word,
5
+ :in_word,
6
+ :in_string_literal,
7
+ :in_string_literal_escape
8
+ ]
9
+
10
+ # these are the states that can be jumped between during tokenization.
11
+ # @return [Array<Symbol>]
12
+ def self.valid_tokenizer_states
13
+ @@valid_tokenizer_states
14
+ end
15
+
16
+ @@tokenizer_state_map = {
17
+ whitespace: {
18
+ outside_word: [:no_action],
19
+ in_word: [:outside_word],
20
+ in_string_literal: [:concat],
21
+ in_string_literal_escape: [:concat]
22
+ },
23
+ escape: {
24
+ outside_word: [:error],
25
+ in_word: [:error],
26
+ in_string_literal: [:in_string_literal_escape],
27
+ in_string_literal_escape: [:concat, :in_string_literal]
28
+ },
29
+ word: {
30
+ outside_word: [:new_token, :concat, :in_word],
31
+ in_word: [:concat],
32
+ in_string_literal: [:concat],
33
+ in_string_literal_escape: [:concat, :in_string_literal]
34
+ },
35
+ colon: {
36
+ outside_word: [:new_token, :concat, :in_word],
37
+ in_word: [:concat],
38
+ in_string_literal: [:concat],
39
+ in_string_literal_escape: [:concat, :in_string_literal]
40
+ },
41
+ singlequote: {
42
+ outside_word: [:new_token, :concat, :in_string_literal],
43
+ in_word: [:error],
44
+ in_string_literal: [:concat, :outside_word],
45
+ in_string_literal_escape: [:concat, :in_string_literal]
46
+ },
47
+ openbrace: {
48
+ outside_word: [:new_token, :concat, :in_word],
49
+ in_word: [:error],
50
+ in_string_literal: [:concat],
51
+ in_string_literal_escape: [:concat, :in_string_literal]
52
+ },
53
+ closebrace: {
54
+ outside_word: [:error],
55
+ in_word: [:concat],
56
+ in_string_literal: [:concat],
57
+ in_string_literal_escape: [:concat, :in_string_literal]
58
+ },
59
+ operator: {
60
+ outside_word: [:new_token, :concat, :in_word],
61
+ in_word: [:concat],
62
+ in_string_literal: [:concat],
63
+ in_string_literal_escape: [:concat, :in_string_literal]
64
+ },
65
+ nonword: {
66
+ outside_word: [:new_token, :concat, :in_word],
67
+ in_word: [:concat],
68
+ in_string_literal: [:concat],
69
+ in_string_literal_escape: [:concat, :in_string_literal]
70
+ },
71
+ }
72
+
73
+ # state machine for use when iterating through the character
74
+ # classifications of a given command. pass in the character c
75
+ # classification and current state and you will receive an
76
+ # array of actions to execute in sequence. these actions
77
+ # include the ability to change state.
78
+ # @return [Hash]
79
+ def self.tokenizer_state_map
80
+ @@tokenizer_state_map
81
+ end
82
+
83
+ # used to process a command string into an array of tokens.
84
+ # the handling here is pretty basic and geared toward providing
85
+ # string literal functionality.
86
+ # a string literal is enclosed in single quotes, with backslash
87
+ # as an escape character. the only escapable characters
88
+ # are single quotes and backslashes.
89
+ # this process does not interpret whether or not a token
90
+ # is valid in any way, it only seeks to break it down reliably.
91
+ # string literal tokens will not have escape characters removed,
92
+ # and will be enclosed in single quotes.
93
+ class Tokenizer
94
+ # classifies a single character during lexical parsing
95
+ # @param [String] input_char single character to classify
96
+ # @return [Symbol] classification for character
97
+ def classify(input_char)
98
+ # whitespace delimits tokens not in string lteral
99
+ return :whitespace if input_char.match(/\s/)
100
+
101
+ # escape character can escape itself
102
+ return :escape if input_char.match(/\\/)
103
+
104
+ # any word character
105
+ return :word if input_char.match(/\w/)
106
+
107
+ # colon is used to represent variables
108
+ return :colon if input_char.match(/\:/)
109
+
110
+ # indicates start of string literal
111
+ return :singlequote if input_char.match(/\'/)
112
+
113
+ # deprecated, old variable reference syntax
114
+ return :openbrace if input_char.match(/\{/)
115
+ return :closebrace if input_char.match(/\}/)
116
+
117
+ # comparison operator chars
118
+ return :operator if input_char.match(/\=|\>|\<|\=|\!/)
119
+
120
+ # any non-word characters
121
+ return :nonword if input_char.match(/\W/)
122
+ end
123
+
124
+ # returns an array with a classification for each character
125
+ # in input string
126
+ # @param [String] string
127
+ # @return [Array<Symbol>] array of classification symbols
128
+ def classify_all(string)
129
+ classified = []
130
+ string.split('').each do |x|
131
+ classified << classify(x)
132
+ end
133
+ classified
134
+ end
135
+
136
+ # returns an array of tokens.
137
+ # @param [String] string command string to tokenize
138
+ # @return [Array<String>] tokens in left to right order
139
+ def tokenize(string)
140
+ # perform lexical analysis
141
+ classified = classify_all(string)
142
+
143
+ # set initial state
144
+ state = :outside_word
145
+
146
+ # array to collect tokens
147
+ tokens = []
148
+
149
+ # will be rebuilt for each token
150
+ current_token = ''
151
+
152
+ # iterate through each character
153
+ classified.each_with_index do |c, i|
154
+ # perform the actions appropriate to character
155
+ # classification and current state
156
+ Sneaql::Core.tokenizer_state_map[c][state].each do |action|
157
+ case
158
+ when action == :no_action then
159
+ nil
160
+ when action == :new_token then
161
+ # rotate the current token if it is not empty string
162
+ tokens << current_token unless current_token == ''
163
+ current_token = ''
164
+ when action == :concat then
165
+ # concatenage current character to current token
166
+ current_token += string[i]
167
+ when action == :error then
168
+ raise 'tokenization error'
169
+ when Sneaql::Core.valid_tokenizer_states.include?(action)
170
+ # if the action is a state name, set the state
171
+ state = action
172
+ end
173
+ end
174
+ end
175
+ # close current token if not empty
176
+ tokens << current_token unless current_token == ''
177
+
178
+ # return array of tokens
179
+ tokens
180
+ end
181
+ end
182
+ end
183
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sneaql
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.13
4
+ version: 0.0.15
5
5
  platform: java
6
6
  authors:
7
7
  - jeremy winters
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-04-16 00:00:00.000000000 Z
11
+ date: 2017-05-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -55,17 +55,17 @@ dependencies:
55
55
  - !ruby/object:Gem::Dependency
56
56
  requirement: !ruby/object:Gem::Requirement
57
57
  requirements:
58
- - - '='
58
+ - - ">="
59
59
  - !ruby/object:Gem::Version
60
- version: 0.0.4
60
+ version: 0.0.6
61
61
  name: jdbc_helpers
62
62
  prerelease: false
63
63
  type: :runtime
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - '='
66
+ - - ">="
67
67
  - !ruby/object:Gem::Version
68
- version: 0.0.4
68
+ version: 0.0.6
69
69
  - !ruby/object:Gem::Dependency
70
70
  requirement: !ruby/object:Gem::Requirement
71
71
  requirements:
@@ -133,6 +133,7 @@ files:
133
133
  - lib/sneaql_lib/standard.rb
134
134
  - lib/sneaql_lib/standard_db_objects.rb
135
135
  - lib/sneaql_lib/step_manager.rb
136
+ - lib/sneaql_lib/tokenizer.rb
136
137
  homepage: https://www.full360.com
137
138
  licenses:
138
139
  - MIT