skeem 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 31d8e575d516bf4429e18814019ca54198bd1088
4
- data.tar.gz: 35a3d39adf7748b70c07cc1553a1b35ec94cd108
3
+ metadata.gz: a4521e994b28e81e3fa1f8852bf096c2e375def5
4
+ data.tar.gz: 86e7e0738987b88b9043740632b510e5bee6a49f
5
5
  SHA512:
6
- metadata.gz: a440baab7b58725760e00511c68c879fed77a6fc168fbafd89f361fa3fc3b49ba69cc5901a77438e4eacbcb2942679b6247d67ae02dec8e44d4145f003ed8eff
7
- data.tar.gz: 21457dcdd46a2cf2efc1ac4d184be50748429954f79de03480f1c6dd26094a08fda17c321cf231a7954bcac3c95593ff75a89238e0e1e97b23d69058a92ab68c
6
+ metadata.gz: 0197b9df68cba199a243b53d22f329728575ec2c8021b813f6f3eb4be4e39d530cfc53fa221be7994a636da06fdc1ad17977985d7d4e0c54dd3f4dda1c8b6cae
7
+ data.tar.gz: 493fb5dd6786dda700abf79ea211d96b832772540986b36eca887ddb2abba4816954080fb5508fdc55dc9ab4b375b5b129f031f507efcf9caf081e05b28ba10f
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## [0.0.2] - 2018-08-25
2
+ ### Changed
3
+ - Class`Tokenizer` improved, does recognize delimiters, booleans, integers, real numbers, strings, and identifiers.
4
+ - Spec file `Tokenizer_spec.rb` expanded with more tests.
5
+
1
6
  ## [0.0.1] - 2018-08-25
2
7
  ### Added
3
8
  - Initial `Tokenizer` class commit
data/README.md CHANGED
@@ -31,6 +31,9 @@ Roadmap:
31
31
  - Make it pass all examples from the [Reasoned Schemer](https://mitpress.mit.edu/books/reasoned-schemer-second-edition) book.
32
32
 
33
33
  TODO: Write usage instructions here
34
+
35
+ Good to know:
36
+ Online book: [The Scheme Programming Language (4th Ed.)](https://www.scheme.com/tspl4/)
34
37
 
35
38
  ## Development
36
39
 
@@ -30,7 +30,14 @@ module Skeem
30
30
  # Constructor. Initialize a tokenizer for Skeem.
31
31
  # @param source [String] Skeem text to tokenize.
32
32
  def initialize(source)
33
- @scanner = StringScanner.new(source)
33
+ @scanner = StringScanner.new('')
34
+ reinitialize(source)
35
+ end
36
+
37
+
38
+ # @param source [String] Skeem text to tokenize.
39
+ def reinitialize(source)
40
+ @scanner.string = source
34
41
  @lineno = 1
35
42
  @line_start = 0
36
43
  end
@@ -58,19 +65,30 @@ module Skeem
58
65
  if "()'`".include? curr_ch
59
66
  # Delimiters, separators => single character token
60
67
  token = build_token(@@lexeme2name[curr_ch], scanner.getch)
61
- elsif (lexeme = scanner.scan(/#(?:t|f|true|false)((?=\s|[|()";])|$)/))
68
+ elsif (lexeme = scanner.scan(/#(?:\.)(?=\s|[|()";]|$)/)) # Single char occurring alone
69
+ token = build_token('PERIOD', lexeme)
70
+ elsif (lexeme = scanner.scan(/#(?:t|f|true|false)(?=\s|[|()";]|$)/))
62
71
  token = build_token('BOOLEAN', lexeme) # normalized lexeme
63
- elsif (lexeme = scanner.scan(/[0-9]+((?=\s|[|()";])|$)/))
72
+ elsif (lexeme = scanner.scan(/[+-]?[0-9]+(?=\s|[|()";]|$)/))
64
73
  token = build_token('INTEGER', lexeme) # Decimal radix
65
- elsif (lexeme = scanner.scan(/-?[0-9]+(\.[0-9]+)?((?=\s|[|()";])|$)/))
74
+ elsif (lexeme = scanner.scan(/[+-]?[0-9]+\.[0-9]+(?:(?:e|E)[+-]?[0-9]+)?/))
66
75
  token = build_token('REAL', lexeme)
67
76
  elsif (lexeme = scanner.scan(/"(?:\\"|[^"])*"/)) # Double quotes literal?
68
77
  unquoted = lexeme.gsub(/(^")|("$)/, '')
69
78
  token = build_token('STRING_LIT', unquoted)
70
- elsif (lexeme = scanner.scan(/([\+\-])((?=\s|[|()";])|$)/))
71
- token = build_token('IDENTIFIER', lexeme) # Plus and minus as identifiers
72
79
  elsif (lexeme = scanner.scan(/[a-zA-Z!$%&*\/:<=>?@^_~][a-zA-Z0-9!$%&*+-.\/:<=>?@^_~+-]*/))
73
80
  token = build_token('IDENTIFIER', lexeme)
81
+ elsif (lexeme = scanner.scan(/\|(?:[^|])*\|/)) # Vertical bar delimited
82
+ token = build_token('IDENTIFIER', lexeme)
83
+ elsif (lexeme = scanner.scan(/([\+\-])((?=\s|[|()";])|$)/))
84
+ # # R7RS peculiar identifiers case 1: isolated plus and minus as identifiers
85
+ token = build_token('IDENTIFIER', lexeme)
86
+ elsif (lexeme = scanner.scan(/[+-][a-zA-Z!$%&*\/:<=>?@^_~+-@][a-zA-Z0-9!$%&*+-.\/:<=>?@^_~+-]*/))
87
+ # R7RS peculiar identifiers case 2
88
+ token = build_token('IDENTIFIER', lexeme)
89
+ elsif (lexeme = scanner.scan(/\.[a-zA-Z!$%&*\/:<=>?@^_~+-@.][a-zA-Z0-9!$%&*+-.\/:<=>?@^_~+-]*/))
90
+ # R7RS peculiar identifiers case 4
91
+ token = build_token('IDENTIFIER', lexeme)
74
92
  else # Unknown token
75
93
  erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
76
94
  sequel = scanner.scan(/.{1,20}/)
data/lib/skeem/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Skeem
2
- VERSION = '0.0.1'.freeze
2
+ VERSION = '0.0.2'.freeze
3
3
  end
@@ -10,7 +10,12 @@ module Skeem
10
10
  expect(token.lexeme).to eq(lexeme)
11
11
  end
12
12
  end
13
+
14
+ def unquoted(aString)
15
+ aString.gsub(/(^")|("$)/, '')
16
+ end
13
17
 
18
+ # Default instantiation
14
19
  subject { Tokenizer.new('') }
15
20
 
16
21
  context 'Initialization:' do
@@ -21,10 +26,11 @@ module Skeem
21
26
  it 'should have its scanner initialized' do
22
27
  expect(subject.scanner).to be_kind_of(StringScanner)
23
28
  end
24
-
29
+ end # context
30
+
25
31
  context 'Delimiter and separator token recognition:' do
26
32
  it 'should tokenize single char delimiters' do
27
- subject.scanner.string = "( ) ' `"
33
+ subject.reinitialize("( ) ' `")
28
34
  tokens = subject.tokens
29
35
  tokens.each { |token| expect(token).to be_kind_of(SToken) }
30
36
  terminals = tokens.map(&:terminal)
@@ -32,6 +38,122 @@ module Skeem
32
38
  expect(terminals).to eq(prediction)
33
39
  end
34
40
  end # context
41
+
42
+ context 'Boolean literals recognition:' do
43
+ it 'should tokenize boolean constants' do
44
+ tests = [
45
+ # couple [raw input, expected]
46
+ ['#t', '#t'],
47
+ [' #f', '#f'],
48
+ ['#true ', '#true'],
49
+ [' #false', '#false']
50
+ ]
51
+
52
+ tests.each do |(input, prediction)|
53
+ subject.reinitialize(input)
54
+ token = subject.tokens.first
55
+ expect(token.terminal).to eq('BOOLEAN')
56
+ expect(token.lexeme).to eq(prediction)
57
+ end
58
+ end
59
+ end # context
60
+
61
+ context 'Integer literals recognition:' do
62
+ it 'should tokenize integers in default radix 10' do
63
+ tests = [
64
+ # couple [raw input, expected]
65
+ ['0', '0'],
66
+ [' 3', '3'],
67
+ ['+3 ', '+3'],
68
+ ['-3', '-3'],
69
+ ['-1234', '-1234']
70
+ ]
71
+
72
+ tests.each do |(input, prediction)|
73
+ subject.reinitialize(input)
74
+ token = subject.tokens.first
75
+ expect(token.terminal).to eq('INTEGER')
76
+ expect(token.lexeme).to eq(prediction)
77
+ end
78
+ end
79
+ end # context
80
+
81
+ context 'Real number recognition:' do
82
+ it 'should tokenize real numbers' do
83
+ tests = [
84
+ # couple [raw input, expected]
85
+ ["\t\t3.45e+6", '3.45e+6'],
86
+ ['+3.45e+6', '+3.45e+6'],
87
+ ['-3.45e+6', '-3.45e+6']
88
+ ]
89
+
90
+ tests.each do |(input, prediction)|
91
+ subject.reinitialize(input)
92
+ token = subject.tokens.first
93
+ expect(token.terminal).to eq('REAL')
94
+ expect(token.lexeme).to eq(prediction)
95
+ end
96
+ end
97
+ end # context
98
+
99
+ context 'String recognition:' do
100
+ it 'should tokenize strings' do
101
+ examples = [
102
+ # Some examples taken from R7RS document
103
+ '"Hello world!"',
104
+ '"The word \"recursion\" has many meanings."'
105
+ ]
106
+
107
+ examples.each do |input|
108
+ # puts input
109
+ subject.reinitialize(input)
110
+ token = subject.tokens.first
111
+ expect(token.terminal).to eq('STRING_LIT')
112
+ expect(token.lexeme).to eq(unquoted(input))
113
+ end
114
+ end
115
+ end # context
116
+
117
+ =begin
118
+ For later:
119
+ "Another example:\ntwo lines of text"
120
+ "Here's text \
121
+ containing just one line"
122
+ "\x03B1; is named GREEK SMALL LETTER ALPHA."
123
+ =end
124
+
125
+ context 'Identifier recognition:' do
126
+ it 'should tokenize identifier' do
127
+ examples = [
128
+ # Examples taken from R7RS document
129
+ '...', '+', '+soup+', '<=?',
130
+ '->string', 'a34kTMNs', 'lambda',
131
+ 'list->vector', 'q', 'V17a',
132
+ '|two words|', '|two\x20;words|',
133
+ 'the-word-recursion-has-many-meanings'
134
+ ]
135
+
136
+ examples.each do |input|
137
+ subject.reinitialize(input)
138
+ token = subject.tokens.first
139
+ expect(token.terminal).to eq('IDENTIFIER')
140
+ expect(token.lexeme).to eq(input)
141
+ end
142
+ end
143
+ end # context
144
+
145
+ context 'Scanning Scheme sample code' do
146
+ it 'should read examples from lis.py page' do
147
+ source = <<-SCHEME
148
+ (if (> (val x) 0)
149
+ (fn (+ (aref A i) (* 3 i))
150
+ (quote (one two)))
151
+ end
152
+ end
153
+ SCHEME
154
+ subject.reinitialize(source)
155
+ expect { subject.tokens }.not_to raise_error
156
+ end
35
157
  end # context
36
158
  end # describe
37
159
  end # module
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: skeem
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef