sneaql 0.0.13-java → 0.0.15-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,183 @@
1
+ module Sneaql
2
+ module Core
3
+ @@valid_tokenizer_states = [
4
+ :outside_word,
5
+ :in_word,
6
+ :in_string_literal,
7
+ :in_string_literal_escape
8
+ ]
9
+
10
+ # these are the states that can be jumped between during tokenization.
11
+ # @return [Array<Symbol>]
12
+ def self.valid_tokenizer_states
13
+ @@valid_tokenizer_states
14
+ end
15
+
16
+ @@tokenizer_state_map = {
17
+ whitespace: {
18
+ outside_word: [:no_action],
19
+ in_word: [:outside_word],
20
+ in_string_literal: [:concat],
21
+ in_string_literal_escape: [:concat]
22
+ },
23
+ escape: {
24
+ outside_word: [:error],
25
+ in_word: [:error],
26
+ in_string_literal: [:in_string_literal_escape],
27
+ in_string_literal_escape: [:concat, :in_string_literal]
28
+ },
29
+ word: {
30
+ outside_word: [:new_token, :concat, :in_word],
31
+ in_word: [:concat],
32
+ in_string_literal: [:concat],
33
+ in_string_literal_escape: [:concat, :in_string_literal]
34
+ },
35
+ colon: {
36
+ outside_word: [:new_token, :concat, :in_word],
37
+ in_word: [:concat],
38
+ in_string_literal: [:concat],
39
+ in_string_literal_escape: [:concat, :in_string_literal]
40
+ },
41
+ singlequote: {
42
+ outside_word: [:new_token, :concat, :in_string_literal],
43
+ in_word: [:error],
44
+ in_string_literal: [:concat, :outside_word],
45
+ in_string_literal_escape: [:concat, :in_string_literal]
46
+ },
47
+ openbrace: {
48
+ outside_word: [:new_token, :concat, :in_word],
49
+ in_word: [:error],
50
+ in_string_literal: [:concat],
51
+ in_string_literal_escape: [:concat, :in_string_literal]
52
+ },
53
+ closebrace: {
54
+ outside_word: [:error],
55
+ in_word: [:concat],
56
+ in_string_literal: [:concat],
57
+ in_string_literal_escape: [:concat, :in_string_literal]
58
+ },
59
+ operator: {
60
+ outside_word: [:new_token, :concat, :in_word],
61
+ in_word: [:concat],
62
+ in_string_literal: [:concat],
63
+ in_string_literal_escape: [:concat, :in_string_literal]
64
+ },
65
+ nonword: {
66
+ outside_word: [:new_token, :concat, :in_word],
67
+ in_word: [:concat],
68
+ in_string_literal: [:concat],
69
+ in_string_literal_escape: [:concat, :in_string_literal]
70
+ },
71
+ }
72
+
73
+ # state machine for use when iterating through the character
74
+ # classifications of a given command. pass in the character c
75
+ # classification and current state and you will receive an
76
+ # array of actions to execute in sequence. these actions
77
+ # include the ability to change state.
78
+ # @return [Hash]
79
+ def self.tokenizer_state_map
80
+ @@tokenizer_state_map
81
+ end
82
+
83
+ # used to process a command string into an array of tokens.
84
+ # the handling here is pretty basic and geared toward providing
85
+ # string literal functionality.
86
+ # a string literal is enclosed in single quotes, with backslash
87
+ # as an escape character. the only escapable characters
88
+ # are single quotes and backslashes.
89
+ # this process does not interpret whether or not a token
90
+ # is valid in any way, it only seeks to break it down reliably.
91
+ # string literal tokens will not have escape characters removed,
92
+ # and will be enclosed in single quotes.
93
+ class Tokenizer
94
+ # classifies a single character during lexical parsing
95
+ # @param [String] input_char single character to classify
96
+ # @return [Symbol] classification for character
97
+ def classify(input_char)
98
+ # whitespace delimits tokens not in string lteral
99
+ return :whitespace if input_char.match(/\s/)
100
+
101
+ # escape character can escape itself
102
+ return :escape if input_char.match(/\\/)
103
+
104
+ # any word character
105
+ return :word if input_char.match(/\w/)
106
+
107
+ # colon is used to represent variables
108
+ return :colon if input_char.match(/\:/)
109
+
110
+ # indicates start of string literal
111
+ return :singlequote if input_char.match(/\'/)
112
+
113
+ # deprecated, old variable reference syntax
114
+ return :openbrace if input_char.match(/\{/)
115
+ return :closebrace if input_char.match(/\}/)
116
+
117
+ # comparison operator chars
118
+ return :operator if input_char.match(/\=|\>|\<|\=|\!/)
119
+
120
+ # any non-word characters
121
+ return :nonword if input_char.match(/\W/)
122
+ end
123
+
124
+ # returns an array with a classification for each character
125
+ # in input string
126
+ # @param [String] string
127
+ # @return [Array<Symbol>] array of classification symbols
128
+ def classify_all(string)
129
+ classified = []
130
+ string.split('').each do |x|
131
+ classified << classify(x)
132
+ end
133
+ classified
134
+ end
135
+
136
+ # returns an array of tokens.
137
+ # @param [String] string command string to tokenize
138
+ # @return [Array<String>] tokens in left to right order
139
+ def tokenize(string)
140
+ # perform lexical analysis
141
+ classified = classify_all(string)
142
+
143
+ # set initial state
144
+ state = :outside_word
145
+
146
+ # array to collect tokens
147
+ tokens = []
148
+
149
+ # will be rebuilt for each token
150
+ current_token = ''
151
+
152
+ # iterate through each character
153
+ classified.each_with_index do |c, i|
154
+ # perform the actions appropriate to character
155
+ # classification and current state
156
+ Sneaql::Core.tokenizer_state_map[c][state].each do |action|
157
+ case
158
+ when action == :no_action then
159
+ nil
160
+ when action == :new_token then
161
+ # rotate the current token if it is not empty string
162
+ tokens << current_token unless current_token == ''
163
+ current_token = ''
164
+ when action == :concat then
165
+ # concatenage current character to current token
166
+ current_token += string[i]
167
+ when action == :error then
168
+ raise 'tokenization error'
169
+ when Sneaql::Core.valid_tokenizer_states.include?(action)
170
+ # if the action is a state name, set the state
171
+ state = action
172
+ end
173
+ end
174
+ end
175
+ # close current token if not empty
176
+ tokens << current_token unless current_token == ''
177
+
178
+ # return array of tokens
179
+ tokens
180
+ end
181
+ end
182
+ end
183
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sneaql
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.13
4
+ version: 0.0.15
5
5
  platform: java
6
6
  authors:
7
7
  - jeremy winters
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-04-16 00:00:00.000000000 Z
11
+ date: 2017-05-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -55,17 +55,17 @@ dependencies:
55
55
  - !ruby/object:Gem::Dependency
56
56
  requirement: !ruby/object:Gem::Requirement
57
57
  requirements:
58
- - - '='
58
+ - - ">="
59
59
  - !ruby/object:Gem::Version
60
- version: 0.0.4
60
+ version: 0.0.6
61
61
  name: jdbc_helpers
62
62
  prerelease: false
63
63
  type: :runtime
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - '='
66
+ - - ">="
67
67
  - !ruby/object:Gem::Version
68
- version: 0.0.4
68
+ version: 0.0.6
69
69
  - !ruby/object:Gem::Dependency
70
70
  requirement: !ruby/object:Gem::Requirement
71
71
  requirements:
@@ -133,6 +133,7 @@ files:
133
133
  - lib/sneaql_lib/standard.rb
134
134
  - lib/sneaql_lib/standard_db_objects.rb
135
135
  - lib/sneaql_lib/step_manager.rb
136
+ - lib/sneaql_lib/tokenizer.rb
136
137
  homepage: https://www.full360.com
137
138
  licenses:
138
139
  - MIT