calyx 0.15.1 → 0.16.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +21 -7
- data/SYNTAX.md +200 -0
- data/lib/calyx/production/weighted_choices.rb +24 -5
- data/lib/calyx/version.rb +1 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2dd4b07658e5e6d37f0b1c7a1f75afb08d61ae5f
|
4
|
+
data.tar.gz: 6cf84a9ef492e359ff297a84129abca738c4ca91
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c8bcd0a0eddc36040f008dc22de6d9df8b8f8e204e036c77f0efaceb226ab835888cfe997b4d9807f01e002b067de1f5b7750c1698711d3e0b05e92d3aac795f
|
7
|
+
data.tar.gz: fe055edd5d1062a6e55ab474ed9d923bc228ed276cb22dca3ce3c5c0cb218361208635bfeeff24112ca159f44d9914ac1ed535a1301f23c68bd73411f0b2fb5d
|
data/README.md
CHANGED
@@ -282,11 +282,19 @@ end
|
|
282
282
|
Basic rule substitution uses single curly brackets as delimiters for template expressions:
|
283
283
|
|
284
284
|
```ruby
|
285
|
-
|
285
|
+
fruit = Calyx::Grammar.new do
|
286
286
|
start '{colour} {fruit}'
|
287
287
|
colour 'red', 'green', 'yellow'
|
288
288
|
fruit 'apple', 'pear', 'tomato'
|
289
289
|
end
|
290
|
+
|
291
|
+
6.times { fruit.generate }
|
292
|
+
# => "yellow pear"
|
293
|
+
# => "red apple"
|
294
|
+
# => "green tomato"
|
295
|
+
# => "red pear"
|
296
|
+
# => "yellow tomato"
|
297
|
+
# => "green apple"
|
290
298
|
```
|
291
299
|
|
292
300
|
## String Modifiers
|
@@ -294,11 +302,12 @@ end
|
|
294
302
|
Dot-notation is supported in template expressions, allowing you to call any available method on the `String` object returned from a rule. Formatting methods can be chained arbitrarily and will execute in the same way as they would in native Ruby code.
|
295
303
|
|
296
304
|
```ruby
|
297
|
-
|
305
|
+
greeting = Calyx::Grammar.new do
|
298
306
|
start '{hello.capitalize} there.', 'Why, {hello} there.'
|
299
307
|
hello 'hello', 'hi'
|
300
308
|
end
|
301
309
|
|
310
|
+
4.times { greeting.generate }
|
302
311
|
# => "Hello there."
|
303
312
|
# => "Hi there."
|
304
313
|
# => "Why, hello there."
|
@@ -312,15 +321,16 @@ You can also extend the grammar with custom modifiers that provide useful format
|
|
312
321
|
Filters accept an input string and return the transformed output:
|
313
322
|
|
314
323
|
```ruby
|
315
|
-
|
324
|
+
greeting = Calyx::Grammar.new do
|
316
325
|
filter :shoutycaps do |input|
|
317
326
|
input.upcase
|
318
327
|
end
|
319
328
|
|
320
|
-
start '{hello.shoutycaps} there.', 'Why, {hello} there.'
|
329
|
+
start '{hello.shoutycaps} there.', 'Why, {hello.shoutycaps} there.'
|
321
330
|
hello 'hello', 'hi'
|
322
331
|
end
|
323
332
|
|
333
|
+
4.times { greeting.generate }
|
324
334
|
# => "HELLO there."
|
325
335
|
# => "HI there."
|
326
336
|
# => "Why, HELLO there."
|
@@ -332,12 +342,13 @@ end
|
|
332
342
|
The mapping shortcut allows you to specify a map of regex patterns pointing to their resulting substitution strings:
|
333
343
|
|
334
344
|
```ruby
|
335
|
-
|
345
|
+
green_bottle = Calyx::Grammar.new do
|
336
346
|
mapping :pluralize, /(.+)/ => '\\1s'
|
337
347
|
start 'One green {bottle}.', 'Two green {bottle.pluralize}.'
|
338
348
|
bottle 'bottle'
|
339
349
|
end
|
340
350
|
|
351
|
+
2.times { green_bottle.generate }
|
341
352
|
# => "One green bottle."
|
342
353
|
# => "Two green bottles."
|
343
354
|
```
|
@@ -355,12 +366,13 @@ module FullStop
|
|
355
366
|
end
|
356
367
|
end
|
357
368
|
|
358
|
-
|
369
|
+
hello = Calyx::Grammar.new do
|
359
370
|
modifier FullStop
|
360
371
|
start '{hello.capitalize.full_stop}'
|
361
372
|
hello 'hello'
|
362
373
|
end
|
363
374
|
|
375
|
+
hello.generate
|
364
376
|
# => "Hello."
|
365
377
|
```
|
366
378
|
|
@@ -395,11 +407,12 @@ class String
|
|
395
407
|
include FullStop
|
396
408
|
end
|
397
409
|
|
398
|
-
|
410
|
+
noun_articles = Calyx::Grammar.new do
|
399
411
|
start '{fruit.with_indefinite_article.capitalize.full_stop}'
|
400
412
|
fruit 'apple', 'orange', 'banana', 'pear'
|
401
413
|
end
|
402
414
|
|
415
|
+
4.times { noun_articles.generate }
|
403
416
|
# => "An apple."
|
404
417
|
# => "An orange."
|
405
418
|
# => "A banana."
|
@@ -507,6 +520,7 @@ Rough plan for stabilising the API and features for a `1.0` release.
|
|
507
520
|
| `0.14` | ~~Support for Ruby 2.4~~ |
|
508
521
|
| `0.15` | Options config and ‘strict mode’ error handling |
|
509
522
|
| `0.16` | Improve representation of weighted probability selection |
|
523
|
+
| `0.17` | Introduce wildcard syntax for meta rules (rules returning rules) |
|
510
524
|
|
511
525
|
## Credits
|
512
526
|
|
data/SYNTAX.md
ADDED
@@ -0,0 +1,200 @@
|
|
1
|
+
# Calyx Syntax Specification
|
2
|
+
|
3
|
+
> An ad-hoc, informally specified, bug-ridden, etc... etc...
|
4
|
+
|
5
|
+
## Background
|
6
|
+
|
7
|
+
Since `v0.11`, Calyx has supported loading grammars from external JSON files—a very similar format to Tracery<sup>[1][1]</sup>—but the precise syntax and structure used by these files was never properly documented or defined in a schema<sup>[2][2]</sup>.
|
8
|
+
|
9
|
+
This is worth documenting for several reasons:
|
10
|
+
|
11
|
+
1) It’s rather obvious that having good documentation will make it easier for new users to get started and for advanced users to learn about the limits of what they can do with the tool.
|
12
|
+
2) A well-defined schema reduces ambiguity and helps focus on authoring concerns, rather than drifting towards implementation concerns. This is currently a particular risk in Calyx because of the impedance mismatch between the Ruby DSL and JSON data.
|
13
|
+
3) A well-defined schema opens up potential for collaboration with authors of other similar tools and could help provide a future foundation for a standard data format that enables sharing grammars across languages and tools. This would be of particular benefit to authors, making it easier to build up reusable content libraries. It could also provide a foundation for new innovations in authoring UIs that aren’t tied to a specific language or tool.
|
14
|
+
|
15
|
+
## Format
|
16
|
+
|
17
|
+
### Files
|
18
|
+
|
19
|
+
External grammars are defined in JSON files. They must be encoded as `utf-8`, have a `.json` extension and conform to standard JSON syntax rules.
|
20
|
+
|
21
|
+
### Structure
|
22
|
+
|
23
|
+
#### Top Level
|
24
|
+
|
25
|
+
The top-level structure of the grammar must be a map/object-literal with each key representing a single left-hand rule symbol and the value representing the grammar productions for that rule:
|
26
|
+
|
27
|
+
```json
|
28
|
+
{
|
29
|
+
"start": "Colorless green ideas sleep furiously."
|
30
|
+
}
|
31
|
+
```
|
32
|
+
|
33
|
+
Empty grammars should be represented by an empty object:
|
34
|
+
|
35
|
+
```json
|
36
|
+
{}
|
37
|
+
```
|
38
|
+
|
39
|
+
#### Production Rules
|
40
|
+
|
41
|
+
Left hand side rules must be string symbols conforming to the following pattern:
|
42
|
+
|
43
|
+
```ruby
|
44
|
+
/^[A-Za-z0-9_\-]+$/
|
45
|
+
```
|
46
|
+
|
47
|
+
Grammars are not context-sensitive<sup>[3][3]</sup>. The left-hand side rules must be a direct symbol reference, not a production that can be expanded.
|
48
|
+
|
49
|
+
Right-hand side productions can be either single strings, arrays of strings or weighted probability objects.
|
50
|
+
|
51
|
+
Strings represent the template for a single choice that the production will always resolve to:
|
52
|
+
|
53
|
+
```json
|
54
|
+
{
|
55
|
+
"start": "Colorless green ideas sleep furiously."
|
56
|
+
}
|
57
|
+
```
|
58
|
+
|
59
|
+
Arrays of strings represent multiple choices that can produce any one of the possible output strings. Each string should have a (roughly) equal chance of being selected to expand to a result.
|
60
|
+
|
61
|
+
```json
|
62
|
+
{
|
63
|
+
"start": ["red", "green", "blue"]
|
64
|
+
}
|
65
|
+
```
|
66
|
+
|
67
|
+
Weighted probability objects represent a mapping of possible output strings to their probability of expanding to a result. The keys represent the possible output strings, and the values represent their probability of the string being selected.
|
68
|
+
|
69
|
+
Supported intervals are:
|
70
|
+
|
71
|
+
- 0..1 (`Number`)
|
72
|
+
|
73
|
+
The following example shows `red` with a 50% chance of being selected; `green` and `blue` with 25% chances:
|
74
|
+
|
75
|
+
```json
|
76
|
+
{
|
77
|
+
"start": {
|
78
|
+
"red": 0.5,
|
79
|
+
"green": 0.25,
|
80
|
+
"blue": 0.25
|
81
|
+
}
|
82
|
+
}
|
83
|
+
```
|
84
|
+
|
85
|
+
#### Template Expansions
|
86
|
+
|
87
|
+
Productions can be recursively expanded by embedding rules using the template expression syntax, with the expressions delimited by `{` and `}` characters. Everything outside of the delimiters is treated as literal text.
|
88
|
+
|
89
|
+
Basic syntax:
|
90
|
+
|
91
|
+
```json
|
92
|
+
"{weather}"
|
93
|
+
```
|
94
|
+
|
95
|
+
Expanding a simple rule:
|
96
|
+
|
97
|
+
```json
|
98
|
+
{
|
99
|
+
"start": "The sky was {weather}.",
|
100
|
+
"weather": ["cloudy", "dark", "clear", "bright"]
|
101
|
+
}
|
102
|
+
```
|
103
|
+
|
104
|
+
A chain of nested expansions:
|
105
|
+
|
106
|
+
```json
|
107
|
+
{
|
108
|
+
"start": "{best} {worst}",
|
109
|
+
"best": "{twas} the {best_adj} of times.",
|
110
|
+
"worst": "{twas} the {worst_adj} of times.",
|
111
|
+
"twas": ["It was", "'Twas"],
|
112
|
+
"best_adj": ["best", "greatest"],
|
113
|
+
"worst_adj": ["worst", "most insufferable"]
|
114
|
+
}
|
115
|
+
```
|
116
|
+
|
117
|
+
#### Expression Modifiers
|
118
|
+
|
119
|
+
There are two different forms of expression modifiers—**Selection Modifiers** and **Output Modifiers**.
|
120
|
+
|
121
|
+
Selection modifiers apply to the grammar production itself, influencing how the rule is expanded. They are defined by prefixing a rule expression with a sigil that defines the behaviour of the selection.
|
122
|
+
|
123
|
+
```json
|
124
|
+
"{$unique_rule}"
|
125
|
+
"{@memoized_rule}"
|
126
|
+
```
|
127
|
+
|
128
|
+
Output modifiers format the string that is generated by the grammar production. They are defined by a chain of `.` separated references following the rule.
|
129
|
+
|
130
|
+
```json
|
131
|
+
"{formatted_rule.upcase}"
|
132
|
+
"{formatted_rule.downcase.capitalize}"
|
133
|
+
```
|
134
|
+
|
135
|
+
#### Unique Choices
|
136
|
+
|
137
|
+
Unique choices are prefixed with the `$` sigil in an expression.
|
138
|
+
|
139
|
+
This ensures that multiple references to the same production will always result in a unique value being chosen (until the choices in the production are exhausted).
|
140
|
+
|
141
|
+
```json
|
142
|
+
{
|
143
|
+
"start": "{$medal}. {$medal}. {$medal}.",
|
144
|
+
"medal": ["Gold", "Silver", "Bronze"]
|
145
|
+
}
|
146
|
+
```
|
147
|
+
|
148
|
+
```json
|
149
|
+
{
|
150
|
+
"start": "It was the {$adj} of times; it was the {$adj} of times.",
|
151
|
+
"adj": ["best", "worst"]
|
152
|
+
}
|
153
|
+
```
|
154
|
+
|
155
|
+
#### Memoized Choices
|
156
|
+
|
157
|
+
Memoized choices are prefixed with the `@` sigil in an expression.
|
158
|
+
|
159
|
+
This ensures that multiple references to the same production will always result in the first selected value being repeated.
|
160
|
+
|
161
|
+
```json
|
162
|
+
{
|
163
|
+
"start": "The {@pet} ran to join the other {@pet}s.",
|
164
|
+
"pet": ["cat", "dog"]
|
165
|
+
}
|
166
|
+
```
|
167
|
+
|
168
|
+
#### Output Modifiers
|
169
|
+
|
170
|
+
Due to their dependency on Ruby string methods and Calyx internals, output modifiers are currently a bit of a nightmare for interoperability.
|
171
|
+
|
172
|
+
All basic Ruby string formatting methods with arity 0 are supported by default<sup>[4][4]</sup>.
|
173
|
+
|
174
|
+
```json
|
175
|
+
"{my_rule.downcase}"
|
176
|
+
"{my_rule.upcase}"
|
177
|
+
"{my_rule.capitalize}"
|
178
|
+
"{my_rule.reverse}"
|
179
|
+
"{my_rule.swapcase}"
|
180
|
+
"{my_rule.strip}"
|
181
|
+
"{my_rule.lstrip}"
|
182
|
+
"{my_rule.rstrip}"
|
183
|
+
"{my_rule.succ}"
|
184
|
+
"{my_rule.chop}"
|
185
|
+
"{my_rule.chomp}"
|
186
|
+
```
|
187
|
+
|
188
|
+
The Ruby DSL provides a variety of methods for extending the supported range of modifiers. This behaviour currently won’t work at all when grammars are defined in JSON.
|
189
|
+
|
190
|
+
## References
|
191
|
+
|
192
|
+
[1]: http://tracery.io/
|
193
|
+
[2]: http://json-schema.org/
|
194
|
+
[3]: https://en.wikipedia.org/wiki/Context-sensitive_grammar
|
195
|
+
[4]: https://ruby-doc.org/core-2.4.0/String.html
|
196
|
+
|
197
|
+
1) http://tracery.io/
|
198
|
+
2) http://json-schema.org/
|
199
|
+
3) https://en.wikipedia.org/wiki/Context-sensitive_grammar
|
200
|
+
4) https://ruby-doc.org/core-2.4.0/String.html
|
@@ -10,13 +10,32 @@ module Calyx
|
|
10
10
|
# @param [Array<Array>, Hash<#to_s, Float>] productions
|
11
11
|
# @param [Calyx::Registry] registry
|
12
12
|
def self.parse(productions, registry)
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
if productions.first.last.is_a?(Range)
|
14
|
+
range_max = productions.max { |a,b| a.last.max <=> b.last.max }.last.max
|
15
|
+
|
16
|
+
weights_sum = productions.reduce(0) do |memo, choice|
|
17
|
+
memo += choice.last.size
|
18
|
+
end
|
19
|
+
|
20
|
+
if range_max != weights_sum
|
21
|
+
raise Errors::InvalidDefinition, "Weights must sum to total: #{range_max}"
|
22
|
+
end
|
16
23
|
|
17
|
-
|
24
|
+
normalized_productions = productions.map do |choice|
|
25
|
+
weight = choice.last.size / range_max.to_f
|
26
|
+
[choice.first, weight]
|
27
|
+
end
|
28
|
+
else
|
29
|
+
weights_sum = productions.reduce(0) do |memo, choice|
|
30
|
+
memo += choice.last
|
31
|
+
end
|
32
|
+
|
33
|
+
raise Errors::InvalidDefinition, 'Weights must sum to 1' if weights_sum != 1.0
|
34
|
+
|
35
|
+
normalized_productions = productions
|
36
|
+
end
|
18
37
|
|
19
|
-
choices =
|
38
|
+
choices = normalized_productions.map do |choice, weight|
|
20
39
|
if choice.is_a?(String)
|
21
40
|
[Concat.parse(choice, registry), weight]
|
22
41
|
elsif choice.is_a?(Symbol)
|
data/lib/calyx/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: calyx
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.16.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mark Rickerby
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-08-
|
11
|
+
date: 2017-08-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -66,6 +66,7 @@ files:
|
|
66
66
|
- Gemfile
|
67
67
|
- LICENSE
|
68
68
|
- README.md
|
69
|
+
- SYNTAX.md
|
69
70
|
- calyx.gemspec
|
70
71
|
- examples/any_gradient.rb
|
71
72
|
- examples/faker.rb
|
@@ -108,7 +109,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
108
109
|
version: '0'
|
109
110
|
requirements: []
|
110
111
|
rubyforge_project:
|
111
|
-
rubygems_version: 2.6.
|
112
|
+
rubygems_version: 2.6.13
|
112
113
|
signing_key:
|
113
114
|
specification_version: 4
|
114
115
|
summary: Generate text with declarative recursive grammars
|