skeem 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 31d8e575d516bf4429e18814019ca54198bd1088
4
- data.tar.gz: 35a3d39adf7748b70c07cc1553a1b35ec94cd108
3
+ metadata.gz: a4521e994b28e81e3fa1f8852bf096c2e375def5
4
+ data.tar.gz: 86e7e0738987b88b9043740632b510e5bee6a49f
5
5
  SHA512:
6
- metadata.gz: a440baab7b58725760e00511c68c879fed77a6fc168fbafd89f361fa3fc3b49ba69cc5901a77438e4eacbcb2942679b6247d67ae02dec8e44d4145f003ed8eff
7
- data.tar.gz: 21457dcdd46a2cf2efc1ac4d184be50748429954f79de03480f1c6dd26094a08fda17c321cf231a7954bcac3c95593ff75a89238e0e1e97b23d69058a92ab68c
6
+ metadata.gz: 0197b9df68cba199a243b53d22f329728575ec2c8021b813f6f3eb4be4e39d530cfc53fa221be7994a636da06fdc1ad17977985d7d4e0c54dd3f4dda1c8b6cae
7
+ data.tar.gz: 493fb5dd6786dda700abf79ea211d96b832772540986b36eca887ddb2abba4816954080fb5508fdc55dc9ab4b375b5b129f031f507efcf9caf081e05b28ba10f
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## [0.0.2] - 2018-08-25
2
+ ### Changed
3
+ - Class`Tokenizer` improved, does recognize delimiters, booleans, integers, real numbers, strings, and identifiers.
4
+ - Spec file `Tokenizer_spec.rb` expanded with more tests.
5
+
1
6
  ## [0.0.1] - 2018-08-25
2
7
  ### Added
3
8
  - Initial `Tokenizer` class commit
data/README.md CHANGED
@@ -31,6 +31,9 @@ Roadmap:
31
31
  - Make it pass all examples from the [Reasoned Schemer](https://mitpress.mit.edu/books/reasoned-schemer-second-edition) book.
32
32
 
33
33
  TODO: Write usage instructions here
34
+
35
+ Good to know:
36
+ Online book: [The Scheme Programming Language (4th Ed.)](https://www.scheme.com/tspl4/)
34
37
 
35
38
  ## Development
36
39
 
@@ -30,7 +30,14 @@ module Skeem
30
30
  # Constructor. Initialize a tokenizer for Skeem.
31
31
  # @param source [String] Skeem text to tokenize.
32
32
  def initialize(source)
33
- @scanner = StringScanner.new(source)
33
+ @scanner = StringScanner.new('')
34
+ reinitialize(source)
35
+ end
36
+
37
+
38
+ # @param source [String] Skeem text to tokenize.
39
+ def reinitialize(source)
40
+ @scanner.string = source
34
41
  @lineno = 1
35
42
  @line_start = 0
36
43
  end
@@ -58,19 +65,30 @@ module Skeem
58
65
  if "()'`".include? curr_ch
59
66
  # Delimiters, separators => single character token
60
67
  token = build_token(@@lexeme2name[curr_ch], scanner.getch)
61
- elsif (lexeme = scanner.scan(/#(?:t|f|true|false)((?=\s|[|()";])|$)/))
68
+ elsif (lexeme = scanner.scan(/#(?:\.)(?=\s|[|()";]|$)/)) # Single char occurring alone
69
+ token = build_token('PERIOD', lexeme)
70
+ elsif (lexeme = scanner.scan(/#(?:t|f|true|false)(?=\s|[|()";]|$)/))
62
71
  token = build_token('BOOLEAN', lexeme) # normalized lexeme
63
- elsif (lexeme = scanner.scan(/[0-9]+((?=\s|[|()";])|$)/))
72
+ elsif (lexeme = scanner.scan(/[+-]?[0-9]+(?=\s|[|()";]|$)/))
64
73
  token = build_token('INTEGER', lexeme) # Decimal radix
65
- elsif (lexeme = scanner.scan(/-?[0-9]+(\.[0-9]+)?((?=\s|[|()";])|$)/))
74
+ elsif (lexeme = scanner.scan(/[+-]?[0-9]+\.[0-9]+(?:(?:e|E)[+-]?[0-9]+)?/))
66
75
  token = build_token('REAL', lexeme)
67
76
  elsif (lexeme = scanner.scan(/"(?:\\"|[^"])*"/)) # Double quotes literal?
68
77
  unquoted = lexeme.gsub(/(^")|("$)/, '')
69
78
  token = build_token('STRING_LIT', unquoted)
70
- elsif (lexeme = scanner.scan(/([\+\-])((?=\s|[|()";])|$)/))
71
- token = build_token('IDENTIFIER', lexeme) # Plus and minus as identifiers
72
79
  elsif (lexeme = scanner.scan(/[a-zA-Z!$%&*\/:<=>?@^_~][a-zA-Z0-9!$%&*+-.\/:<=>?@^_~+-]*/))
73
80
  token = build_token('IDENTIFIER', lexeme)
81
+ elsif (lexeme = scanner.scan(/\|(?:[^|])*\|/)) # Vertical bar delimited
82
+ token = build_token('IDENTIFIER', lexeme)
83
+ elsif (lexeme = scanner.scan(/([\+\-])((?=\s|[|()";])|$)/))
84
+ # # R7RS peculiar identifiers case 1: isolated plus and minus as identifiers
85
+ token = build_token('IDENTIFIER', lexeme)
86
+ elsif (lexeme = scanner.scan(/[+-][a-zA-Z!$%&*\/:<=>?@^_~+-@][a-zA-Z0-9!$%&*+-.\/:<=>?@^_~+-]*/))
87
+ # R7RS peculiar identifiers case 2
88
+ token = build_token('IDENTIFIER', lexeme)
89
+ elsif (lexeme = scanner.scan(/\.[a-zA-Z!$%&*\/:<=>?@^_~+-@.][a-zA-Z0-9!$%&*+-.\/:<=>?@^_~+-]*/))
90
+ # R7RS peculiar identifiers case 4
91
+ token = build_token('IDENTIFIER', lexeme)
74
92
  else # Unknown token
75
93
  erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
76
94
  sequel = scanner.scan(/.{1,20}/)
data/lib/skeem/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Skeem
2
- VERSION = '0.0.1'.freeze
2
+ VERSION = '0.0.2'.freeze
3
3
  end
@@ -10,7 +10,12 @@ module Skeem
10
10
  expect(token.lexeme).to eq(lexeme)
11
11
  end
12
12
  end
13
+
14
+ def unquoted(aString)
15
+ aString.gsub(/(^")|("$)/, '')
16
+ end
13
17
 
18
+ # Default instantiation
14
19
  subject { Tokenizer.new('') }
15
20
 
16
21
  context 'Initialization:' do
@@ -21,10 +26,11 @@ module Skeem
21
26
  it 'should have its scanner initialized' do
22
27
  expect(subject.scanner).to be_kind_of(StringScanner)
23
28
  end
24
-
29
+ end # context
30
+
25
31
  context 'Delimiter and separator token recognition:' do
26
32
  it 'should tokenize single char delimiters' do
27
- subject.scanner.string = "( ) ' `"
33
+ subject.reinitialize("( ) ' `")
28
34
  tokens = subject.tokens
29
35
  tokens.each { |token| expect(token).to be_kind_of(SToken) }
30
36
  terminals = tokens.map(&:terminal)
@@ -32,6 +38,122 @@ module Skeem
32
38
  expect(terminals).to eq(prediction)
33
39
  end
34
40
  end # context
41
+
42
+ context 'Boolean literals recognition:' do
43
+ it 'should tokenize boolean constants' do
44
+ tests = [
45
+ # couple [raw input, expected]
46
+ ['#t', '#t'],
47
+ [' #f', '#f'],
48
+ ['#true ', '#true'],
49
+ [' #false', '#false']
50
+ ]
51
+
52
+ tests.each do |(input, prediction)|
53
+ subject.reinitialize(input)
54
+ token = subject.tokens.first
55
+ expect(token.terminal).to eq('BOOLEAN')
56
+ expect(token.lexeme).to eq(prediction)
57
+ end
58
+ end
59
+ end # context
60
+
61
+ context 'Integer literals recognition:' do
62
+ it 'should tokenize integers in default radix 10' do
63
+ tests = [
64
+ # couple [raw input, expected]
65
+ ['0', '0'],
66
+ [' 3', '3'],
67
+ ['+3 ', '+3'],
68
+ ['-3', '-3'],
69
+ ['-1234', '-1234']
70
+ ]
71
+
72
+ tests.each do |(input, prediction)|
73
+ subject.reinitialize(input)
74
+ token = subject.tokens.first
75
+ expect(token.terminal).to eq('INTEGER')
76
+ expect(token.lexeme).to eq(prediction)
77
+ end
78
+ end
79
+ end # context
80
+
81
+ context 'Real number recognition:' do
82
+ it 'should tokenize real numbers' do
83
+ tests = [
84
+ # couple [raw input, expected]
85
+ ["\t\t3.45e+6", '3.45e+6'],
86
+ ['+3.45e+6', '+3.45e+6'],
87
+ ['-3.45e+6', '-3.45e+6']
88
+ ]
89
+
90
+ tests.each do |(input, prediction)|
91
+ subject.reinitialize(input)
92
+ token = subject.tokens.first
93
+ expect(token.terminal).to eq('REAL')
94
+ expect(token.lexeme).to eq(prediction)
95
+ end
96
+ end
97
+ end # context
98
+
99
+ context 'String recognition:' do
100
+ it 'should tokenize strings' do
101
+ examples = [
102
+ # Some examples taken from R7RS document
103
+ '"Hello world!"',
104
+ '"The word \"recursion\" has many meanings."'
105
+ ]
106
+
107
+ examples.each do |input|
108
+ # puts input
109
+ subject.reinitialize(input)
110
+ token = subject.tokens.first
111
+ expect(token.terminal).to eq('STRING_LIT')
112
+ expect(token.lexeme).to eq(unquoted(input))
113
+ end
114
+ end
115
+ end # context
116
+
117
+ =begin
118
+ For later:
119
+ "Another example:\ntwo lines of text"
120
+ "Here's text \
121
+ containing just one line"
122
+ "\x03B1; is named GREEK SMALL LETTER ALPHA."
123
+ =end
124
+
125
+ context 'Identifier recognition:' do
126
+ it 'should tokenize identifier' do
127
+ examples = [
128
+ # Examples taken from R7RS document
129
+ '...', '+', '+soup+', '<=?',
130
+ '->string', 'a34kTMNs', 'lambda',
131
+ 'list->vector', 'q', 'V17a',
132
+ '|two words|', '|two\x20;words|',
133
+ 'the-word-recursion-has-many-meanings'
134
+ ]
135
+
136
+ examples.each do |input|
137
+ subject.reinitialize(input)
138
+ token = subject.tokens.first
139
+ expect(token.terminal).to eq('IDENTIFIER')
140
+ expect(token.lexeme).to eq(input)
141
+ end
142
+ end
143
+ end # context
144
+
145
+ context 'Scanning Scheme sample code' do
146
+ it 'should read examples from lis.py page' do
147
+ source = <<-SCHEME
148
+ (if (> (val x) 0)
149
+ (fn (+ (aref A i) (* 3 i))
150
+ (quote (one two)))
151
+ end
152
+ end
153
+ SCHEME
154
+ subject.reinitialize(source)
155
+ expect { subject.tokens }.not_to raise_error
156
+ end
35
157
  end # context
36
158
  end # describe
37
159
  end # module
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: skeem
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef