correspondence-markup 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
@@ -5,11 +5,18 @@ require 'correspondence-markup/bracketed-grammar'
|
|
5
5
|
|
6
6
|
module CorrespondenceMarkup
|
7
7
|
|
8
|
+
# Compiler than parses and compiles correspondence markup source code
|
9
|
+
# into an array of StructureGroup objects (from which HTML can be
|
10
|
+
# generated in the format required by correspondence.js)
|
8
11
|
class CorrespondenceMarkupCompiler
|
12
|
+
|
13
|
+
# initialize by creating the CorrespondenceMarkupLanguageParser (defined by the Treetop source)
|
9
14
|
def initialize
|
10
15
|
@parser = CorrespondenceMarkupLanguageParser.new
|
11
16
|
end
|
12
17
|
|
18
|
+
# compile source code into an array of StructureGroups,
|
19
|
+
# throwing an exception if there is a parse error
|
13
20
|
def compile_structure_groups(markup)
|
14
21
|
syntax_tree = @parser.parse(markup, root: :structure_groups)
|
15
22
|
if(syntax_tree.nil?)
|
@@ -19,8 +26,4 @@ module CorrespondenceMarkup
|
|
19
26
|
end
|
20
27
|
end
|
21
28
|
|
22
|
-
def self.sayHello
|
23
|
-
"hello"
|
24
|
-
end
|
25
|
-
|
26
29
|
end
|
@@ -1,21 +1,49 @@
|
|
1
1
|
require "correspondence-markup/types"
|
2
2
|
|
3
|
+
# Grammar for a markup language which can be compiled into HTML format
|
4
|
+
# required by correspondence.js
|
5
|
+
|
6
|
+
# General note on bracketing of sequences - different components are enclosed
|
7
|
+
# by different types of bracket, in particular:
|
8
|
+
#
|
9
|
+
# * item: "[]"
|
10
|
+
# * item-group: "[]"
|
11
|
+
# * structure: "{}"
|
12
|
+
# * structure group: "()"
|
13
|
+
#
|
14
|
+
# However, in anticipation of a UI where the user may choose the granularity
|
15
|
+
# at which to edit components of particular content, the parsing of brackets
|
16
|
+
# is handled by the parent component, e.g. the "{}" bracketing of structures
|
17
|
+
# is specified in the grammar rule for structure_group.
|
18
|
+
# For example, if a user is editing a structure definition in a UI text area,
|
19
|
+
# there should be no necessity for the user to enter the enclosing "{}" brackets,
|
20
|
+
# because the UI text area implicitly encloses the definition that the user is editing.
|
21
|
+
|
3
22
|
grammar CorrespondenceMarkupLanguage
|
4
23
|
|
24
|
+
# Include the Module containing Ruby classes representing the AST nodes
|
5
25
|
include CorrespondenceMarkup
|
6
26
|
|
27
|
+
# This rule defines a sequence of structure groups.
|
28
|
+
# Individual structure groups are independent of each other
|
29
|
+
# (although typically they will be a sequence of structure groups
|
30
|
+
# where each structure group has structures in the same sequence of languages).
|
7
31
|
rule structure_groups
|
8
32
|
s groups:("(" structure_group ")" s)*
|
9
33
|
{
|
34
|
+
# Return an array of StructureGroup object
|
10
35
|
def value
|
11
36
|
groups.elements.map {|e| e.structure_group.value}
|
12
37
|
end
|
13
38
|
}
|
14
39
|
end
|
15
40
|
|
41
|
+
# A structure group is a group of structures where each structure represents
|
42
|
+
# the same information in a different "language".
|
16
43
|
rule structure_group
|
17
44
|
s description:structure_group_description? s structures:("{" structure "}" s)*
|
18
45
|
{
|
46
|
+
# Return a StructureGroup
|
19
47
|
def value
|
20
48
|
structureObjects = structures.elements.map {|e| e.structure.value}
|
21
49
|
CorrespondenceMarkup::StructureGroup.new(structureObjects)
|
@@ -23,13 +51,23 @@ grammar CorrespondenceMarkupLanguage
|
|
23
51
|
}
|
24
52
|
end
|
25
53
|
|
54
|
+
# Optional lengthy description of a particular structure group.
|
55
|
+
# (Intended to be displayed as the title of the structure group
|
56
|
+
# describing the information presented in the structure group.)
|
26
57
|
rule structure_group_description
|
27
58
|
"#" s [^{\n]* "\n"
|
28
59
|
end
|
29
60
|
|
61
|
+
# A structure is one of two or more structures in a structure group
|
62
|
+
# (although the grammar does not impose any count restriction, in anticipation
|
63
|
+
# of application user's editing and saving incomplete content).
|
64
|
+
# A structure has a "type" (short language description intended to map to a CSS class),
|
65
|
+
# a "description" (longer but still concise language description for display to the reader)
|
66
|
+
# and a sequence of "item groups".
|
30
67
|
rule structure
|
31
68
|
structure_annotation s itemGroups:("[" item_group "]" s)*
|
32
69
|
{
|
70
|
+
# Return a Structure
|
33
71
|
def value
|
34
72
|
itemGroupObjects = itemGroups.elements.map {|e| e.item_group.value}
|
35
73
|
class_name, description = structure_annotation.value
|
@@ -38,14 +76,17 @@ grammar CorrespondenceMarkupLanguage
|
|
38
76
|
end
|
39
77
|
}
|
40
78
|
end
|
41
|
-
|
79
|
+
|
80
|
+
# Structure class (for the structure's "type"), with rules similar to those of a CSS class identifier.
|
42
81
|
rule structure_class
|
43
82
|
([a-zA-Z] [a-zA-Z0-9_-]*)?
|
44
83
|
end
|
45
84
|
|
85
|
+
# Structure annotation contains the "type" and the "description" (both optional)
|
46
86
|
rule structure_annotation
|
47
87
|
structure_class description_section:(":" s description:[^\n]* "\n")?
|
48
88
|
{
|
89
|
+
# Return an array of two strings for the type and the description
|
49
90
|
def value
|
50
91
|
class_name = structure_class.text_value
|
51
92
|
description = nil
|
@@ -57,9 +98,16 @@ grammar CorrespondenceMarkupLanguage
|
|
57
98
|
}
|
58
99
|
end
|
59
100
|
|
101
|
+
# An item group is a sub-structure of a structure which contains a sequence of items and "non-items".
|
102
|
+
# An item group has an upper-case alphabetic ID (which should be unique within a structure,
|
103
|
+
# and which should be the ID of an item-group in the first structure of a structure group, but
|
104
|
+
# neither of these rules is required by the grammar).
|
105
|
+
# The item group ID is used as a default prefix for any item IDs that do not start with
|
106
|
+
# alphabetic characters (so a full item ID is always alphabetic followed by something else).
|
60
107
|
rule item_group
|
61
108
|
optional_id:(id:[A-Z]* ":")? components:(item / non_item)*
|
62
109
|
{
|
110
|
+
# Return an ItemGroup
|
63
111
|
def value
|
64
112
|
group_id = optional_id.elements ? optional_id.elements[0].text_value : ""
|
65
113
|
componentObjects = components.elements.map {|e| e.value(group_id)}
|
@@ -68,18 +116,37 @@ grammar CorrespondenceMarkupLanguage
|
|
68
116
|
}
|
69
117
|
end
|
70
118
|
|
119
|
+
# A "non-item" is textual content in an item group that is not part of an actual item.
|
120
|
+
# In effect this is text which is either not translatable to content in other structures
|
121
|
+
# in the same structure group, or, it is considered unimportant to identify its translation.
|
122
|
+
# For example, in the second case, punctuation in sentences, where translation is reasonably obvious, and we
|
123
|
+
# wish to highlight the translations of the actual words.
|
71
124
|
rule non_item
|
72
125
|
text:text
|
73
126
|
{
|
127
|
+
# Given the item group ID (as a default prefix for the item IDs, which is ignored for non-items),
|
128
|
+
# return a NonItem.
|
74
129
|
def value(group_id = "")
|
75
130
|
CorrespondenceMarkup::NonItem.new(text.value)
|
76
131
|
end
|
77
132
|
}
|
78
133
|
end
|
79
134
|
|
135
|
+
# An item is textual content with an ID, where different items in the same structure group
|
136
|
+
# with the same ID are considered to be related to each other.
|
137
|
+
# Typically, items with the same ID in the same structure are considered to be part of the
|
138
|
+
# "same item", and items with the same ID in different structures are considered to be
|
139
|
+
# translations of each other.
|
140
|
+
# Item IDs consists of an upper-case alphabetic prefix followed by a numeric ID.
|
141
|
+
# Any item ID that lacks an alphabetic prefix will have the item group ID of the containing
|
142
|
+
# item group added as a prefix to its ID.
|
143
|
+
# (This reflects the assumption that an item usually relates to items in item groups in other
|
144
|
+
# structures with the same item group ID, but occasionally an item relates to an item in
|
145
|
+
# some other item group in another structure.)
|
80
146
|
rule item
|
81
147
|
"[" id:item_ids S text:text "]"
|
82
148
|
{
|
149
|
+
# Given the item group ID (as a default prefix for the item IDs), return an Item
|
83
150
|
def value(group_id = "")
|
84
151
|
item_ids = id.text_value.split(",")
|
85
152
|
item_ids = item_ids.map { |item_id| item_id.match(/[A-Z]/) ? item_id : group_id + item_id}
|
@@ -88,27 +155,38 @@ grammar CorrespondenceMarkupLanguage
|
|
88
155
|
}
|
89
156
|
end
|
90
157
|
|
158
|
+
# Text is the textual component of both items and non-items.
|
159
|
+
# Text is delimited by "]", "[" and (at the beginning of items) whitespace.
|
160
|
+
# Text can include backslash-quoted characters, for example to include any of the delimiter characters.
|
91
161
|
rule text
|
92
162
|
(("\\" .) / (![\[\]\\] .))+
|
93
163
|
{
|
164
|
+
# Return the text, de-quoting any backslash-quoted characters.
|
94
165
|
def value
|
95
166
|
text_value.gsub(/\\(.)/, '\1')
|
96
167
|
end
|
97
168
|
}
|
98
169
|
end
|
99
170
|
|
171
|
+
# Items can actually have multiple IDs, in which case they are separated by commas
|
172
|
+
# (and no whitespace). If there are multiple IDs, the convention of applying the
|
173
|
+
# item group ID as a default prefix is applied individually to each ID.
|
174
|
+
# So, for example, "2,A2,3" in item group B would be expanded to "B2,A2,B3".
|
100
175
|
rule item_ids
|
101
176
|
item_id ("," item_id)*
|
102
177
|
end
|
103
178
|
|
179
|
+
# An item ID - optional upper-case alphabetic prefix, followed by a numeric ID.
|
104
180
|
rule item_id
|
105
181
|
[A-Z]* [0-9]+
|
106
182
|
end
|
107
183
|
|
184
|
+
# Rule for optional whitespace
|
108
185
|
rule s
|
109
186
|
[\s\n\r\t]*
|
110
187
|
end
|
111
188
|
|
189
|
+
# Rule for mandatory whitespace
|
112
190
|
rule S
|
113
191
|
[\s\n\r\t]+
|
114
192
|
end
|
@@ -1,17 +1,31 @@
|
|
1
1
|
|
2
2
|
require 'cgi'
|
3
3
|
|
4
|
+
# Module containing the Ruby classes that define the nodes of the AST
|
5
|
+
# created when parsing Correspondence-Markup language according to the
|
6
|
+
# grammar defined by *bracketed-grammar.treetop*.
|
7
|
+
# Each node class knows how to output itself as HTML as defined by the
|
8
|
+
# method *to_html*.
|
4
9
|
module CorrespondenceMarkup
|
5
|
-
|
10
|
+
|
11
|
+
# Helper functions used when generating HTML
|
6
12
|
module Helpers
|
7
13
|
|
8
14
|
# Either 1: a tag enclosed in "<" & ">", possibly missing the ">", or, 2: text outside a tag
|
9
15
|
TAGS_AND_TEXT_REGEX = /([<][^>]*[>]?)|([^<]+)/
|
10
16
|
|
17
|
+
# Split some HTML source into tags and plain text not in tags
|
18
|
+
# (so that the two can be processed differently, e.g. applying a transformation to text content
|
19
|
+
# where you don't want the transformation to apply to the internals of a directly-coded HTML tag)
|
11
20
|
def self.split_tags_and_text(html)
|
12
21
|
html.scan(TAGS_AND_TEXT_REGEX).to_a
|
13
22
|
end
|
14
23
|
|
24
|
+
# Convert text content into HTML according to various true/false options.
|
25
|
+
# Note: the text may contain HTML tags
|
26
|
+
# * escaped: if true, HTML-escape the text
|
27
|
+
# * br: if true, convert end-of-line characters to <br/> tags
|
28
|
+
# * nbsp: if true, convert all spaces in the text that is not in tags into
|
15
29
|
def text_to_html(text, options)
|
16
30
|
html = text
|
17
31
|
if options[:escaped]
|
@@ -31,88 +45,145 @@ module CorrespondenceMarkup
|
|
31
45
|
end
|
32
46
|
|
33
47
|
# An item is text in a structure with an associated id
|
48
|
+
# Typically if would be a word in a sentence. Items are to
|
49
|
+
# be related to other items in other structures in the same
|
50
|
+
# structure group that have the same ID (also to other items
|
51
|
+
# in the same structure with the same ID).
|
34
52
|
class Item
|
53
|
+
|
35
54
|
include Helpers
|
36
55
|
|
37
|
-
|
56
|
+
# The ID, which identifies the item (possibly not uniquely) within a given structure
|
57
|
+
attr_reader :id
|
38
58
|
|
59
|
+
# The text of the item
|
60
|
+
attr_reader :text
|
61
|
+
|
62
|
+
# Initialize from ID and text
|
39
63
|
def initialize(id, text)
|
40
64
|
@id = id
|
41
65
|
@text = text
|
42
66
|
end
|
43
67
|
|
68
|
+
# Is this an item? (yes)
|
44
69
|
def item?
|
45
70
|
true
|
46
71
|
end
|
47
72
|
|
73
|
+
# An item is equal to another item with the same ID and text
|
48
74
|
def ==(otherItem)
|
49
75
|
otherItem.class == Item && otherItem.id == @id && otherItem.text == @text
|
50
76
|
end
|
51
77
|
|
78
|
+
# Convert to HTML as a *<span>* element with *data-id* attribute set to the ID
|
79
|
+
# according to options for Helpers::text_to_html
|
52
80
|
def to_html(options={})
|
53
81
|
text_html = text_to_html(@text, options)
|
54
82
|
"<span data-id=\"#{@id}\">#{text_html}</span>"
|
55
83
|
end
|
56
84
|
end
|
57
85
|
|
58
|
-
# A non-item is text in a structure that is not an item - it
|
86
|
+
# A non-item is some text in a structure that is not an item - it will
|
87
|
+
# not be related to any other text.
|
59
88
|
class NonItem
|
60
89
|
include Helpers
|
61
90
|
|
91
|
+
# The text of the non-item
|
62
92
|
attr_reader :text
|
63
93
|
|
94
|
+
# Initialize from text
|
64
95
|
def initialize(text)
|
65
96
|
@text = text
|
66
97
|
end
|
67
98
|
|
99
|
+
# Is this an item? (no)
|
68
100
|
def item?
|
69
101
|
false
|
70
102
|
end
|
71
103
|
|
104
|
+
# A non-item is equal to another non-item with the same text
|
72
105
|
def ==(otherNonItem)
|
73
106
|
otherNonItem.class == NonItem && otherNonItem.text == @text
|
74
107
|
end
|
75
108
|
|
109
|
+
# Convert to HTML according to options for Helpers::text_to_html
|
76
110
|
def to_html(options={})
|
77
111
|
text_to_html(@text, options)
|
78
112
|
end
|
79
113
|
end
|
80
114
|
|
81
115
|
# A group of items & non-items that will form part of a structure
|
116
|
+
# Typically an item group is one line of items (words), or maybe
|
117
|
+
# two or three lines which naturally group together within the
|
118
|
+
# overall structure.
|
119
|
+
# Item groups with the same ID in different structures in the same
|
120
|
+
# structure group related to each other, and may be shown next
|
121
|
+
# to each other in the UI when the "Interleave" option is chosen.
|
122
|
+
# (An "item group" could also be regarded as a "sub-structure".)
|
82
123
|
class ItemGroup
|
83
|
-
attr_reader :id, :content
|
84
124
|
|
125
|
+
# The ID which is unique in the structure. It identifies the
|
126
|
+
# item group uniquely within the structure. It also serves as a default
|
127
|
+
# prefix when parsing IDs for individual items.
|
128
|
+
attr_reader :id
|
129
|
+
|
130
|
+
# The array of items and non-items
|
131
|
+
attr_reader :content
|
132
|
+
|
133
|
+
# Initialize from ID and array of items and non-items
|
85
134
|
def initialize(id, content)
|
86
135
|
@id = id
|
87
136
|
@content = content
|
88
137
|
end
|
89
138
|
|
139
|
+
# An item group is equal to another item group with the same IDs and the same content
|
90
140
|
def ==(otherItemGroup)
|
91
141
|
otherItemGroup.class == ItemGroup && otherItemGroup.id == @id && otherItemGroup.content == @content
|
92
142
|
end
|
93
143
|
|
144
|
+
# Convert to HTML as a *<div>* tag with class *item-group*, *data-group-id* attribute
|
145
|
+
# equal to the ID, and containing the HTML output for the content items and non-items
|
146
|
+
# (with those converted according to the options for Helpers::text_to_html).
|
94
147
|
def to_html(options={})
|
95
148
|
"<div class=\"item-group\" data-group-id=\"#{@id}\">\n " +
|
96
149
|
@content.map{|x| x.to_html(options)}.join("") + "\n</div>\n"
|
97
150
|
end
|
98
151
|
end
|
99
152
|
|
100
|
-
# A structure, containing a sequence of
|
153
|
+
# A structure, containing a sequence of item groups, as well as a type and a description.
|
154
|
+
# A structure will be one of two or more in a "structure group".
|
101
155
|
class Structure
|
102
|
-
attr_reader :type, :description, :item_groups
|
103
156
|
|
157
|
+
# A short alphanumeric name for the type, typically reflecting the "language" of a structure
|
158
|
+
# where different structures in a group are different language versions of the same information.
|
159
|
+
# It is used to determine a CSS class of the structure. E.g. "english". (It can be nil.)
|
160
|
+
attr_reader :type
|
161
|
+
|
162
|
+
# A textual description of the type which will be displayed in the UI. E.g. "English".
|
163
|
+
# Ideally it should be relatively concise. Can be nil.
|
164
|
+
attr_reader :description
|
165
|
+
|
166
|
+
# The array of item groups that make up the content of the structure.
|
167
|
+
attr_reader :item_groups
|
168
|
+
|
169
|
+
# Initialize from type, description and item groups
|
104
170
|
def initialize(type, description, item_groups)
|
105
171
|
@type = type
|
106
172
|
@description = description
|
107
173
|
@item_groups = item_groups
|
108
174
|
end
|
109
175
|
|
176
|
+
# A structure is equal to another structure with the same type, description and item groups
|
110
177
|
def ==(otherStructure)
|
111
178
|
otherStructure.class == Structure && otherStructure.type == @type &&
|
112
179
|
otherStructure.description == description &&
|
113
180
|
otherStructure.item_groups == @item_groups
|
114
181
|
end
|
115
182
|
|
183
|
+
# From the type, determine the CSS class names to be used in the *<div>* element created
|
184
|
+
# by to_html. If there is no type, then just "structure", otherwise, "structure <type>-structure",
|
185
|
+
# e.g. if the type is "english", then "structure english-structure".
|
186
|
+
# (The "-structure" suffix is used to reduce the chance of accidental CSS class name collisions.)
|
116
187
|
def css_class_names
|
117
188
|
class_names = "structure"
|
118
189
|
if @type != "" and @type != nil
|
@@ -121,6 +192,9 @@ module CorrespondenceMarkup
|
|
121
192
|
class_names
|
122
193
|
end
|
123
194
|
|
195
|
+
# Convert to HTML as a *<div>* with CSS class determined by *css_class_names*.
|
196
|
+
# Include a *<div>* of CSS class "language" (if the description is given)
|
197
|
+
# Include HTML for the item groups, converted according to the options for Helpers::text_to_html).
|
124
198
|
def to_html(options={})
|
125
199
|
itemGroupHtmls = @item_groups.map{|x| x.to_html(options)}
|
126
200
|
"<div class=\"#{css_class_names}\">\n " +
|
@@ -130,18 +204,31 @@ module CorrespondenceMarkup
|
|
130
204
|
end
|
131
205
|
|
132
206
|
end
|
133
|
-
|
207
|
+
|
208
|
+
# A structure group is a group of structures. Different structures in one structure group
|
209
|
+
# all represent the same information, but in different "languages". Items different
|
210
|
+
# structures with the same item ID are shown in the UI as being translations of each other.
|
211
|
+
# (Items with the same ID in the same structure are also show as related, and are presumed
|
212
|
+
# to be separated components of a single virtual item.)
|
134
213
|
class StructureGroup
|
214
|
+
|
215
|
+
# The array of structures
|
135
216
|
attr_reader :structures
|
136
217
|
|
218
|
+
# Initialize from the structures
|
137
219
|
def initialize(structures)
|
138
220
|
@structures = structures
|
139
221
|
end
|
140
222
|
|
223
|
+
# A structure group is equal to another structure group that has the same structures
|
141
224
|
def ==(otherStructureGroup)
|
142
225
|
otherStructureGroup.class == StructureGroup && otherStructureGroup.structures == @structures
|
143
226
|
end
|
144
227
|
|
228
|
+
# Convert to HTML as a *<div>* of CSS class "structure-group" that contains the HTML
|
229
|
+
# outputs from the structures.
|
230
|
+
# Options for Helpers::text_to_html can be provided as single true/false value, or, as arrays
|
231
|
+
# of values, in which case the individual values are mapped to the corresponding structures.
|
145
232
|
def to_html(options={})
|
146
233
|
numStructures = structures.length
|
147
234
|
structureOptions = Array.new(numStructures)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: correspondence-markup
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-07-
|
12
|
+
date: 2013-07-14 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: Use to generate HTML pages containing structure groups, structures and
|
15
15
|
items as used by correspondence.js.
|
@@ -38,7 +38,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
38
38
|
version: '0'
|
39
39
|
segments:
|
40
40
|
- 0
|
41
|
-
hash: -
|
41
|
+
hash: -1042658257
|
42
42
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
43
43
|
none: false
|
44
44
|
requirements:
|