correspondence-markup 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,11 +5,18 @@ require 'correspondence-markup/bracketed-grammar'
|
|
5
5
|
|
6
6
|
module CorrespondenceMarkup
|
7
7
|
|
8
|
+
# Compiler than parses and compiles correspondence markup source code
|
9
|
+
# into an array of StructureGroup objects (from which HTML can be
|
10
|
+
# generated in the format required by correspondence.js)
|
8
11
|
class CorrespondenceMarkupCompiler
|
12
|
+
|
13
|
+
# initialize by creating the CorrespondenceMarkupLanguageParser (defined by the Treetop source)
|
9
14
|
def initialize
|
10
15
|
@parser = CorrespondenceMarkupLanguageParser.new
|
11
16
|
end
|
12
17
|
|
18
|
+
# compile source code into an array of StructureGroups,
|
19
|
+
# throwing an exception if there is a parse error
|
13
20
|
def compile_structure_groups(markup)
|
14
21
|
syntax_tree = @parser.parse(markup, root: :structure_groups)
|
15
22
|
if(syntax_tree.nil?)
|
@@ -19,8 +26,4 @@ module CorrespondenceMarkup
|
|
19
26
|
end
|
20
27
|
end
|
21
28
|
|
22
|
-
def self.sayHello
|
23
|
-
"hello"
|
24
|
-
end
|
25
|
-
|
26
29
|
end
|
@@ -1,21 +1,49 @@
|
|
1
1
|
require "correspondence-markup/types"
|
2
2
|
|
3
|
+
# Grammar for a markup language which can be compiled into HTML format
|
4
|
+
# required by correspondence.js
|
5
|
+
|
6
|
+
# General note on bracketing of sequences - different components are enclosed
|
7
|
+
# by different types of bracket, in particular:
|
8
|
+
#
|
9
|
+
# * item: "[]"
|
10
|
+
# * item-group: "[]"
|
11
|
+
# * structure: "{}"
|
12
|
+
# * structure group: "()"
|
13
|
+
#
|
14
|
+
# However, in anticipation of a UI where the user may choose the granularity
|
15
|
+
# at which to edit components of particular content, the parsing of brackets
|
16
|
+
# is handled by the parent component, e.g. the "{}" bracketing of structures
|
17
|
+
# is specified in the grammar rule for structure_group.
|
18
|
+
# For example, if a user is editing a structure definition in a UI text area,
|
19
|
+
# there should be no necessity for the user to enter the enclosing "{}" brackets,
|
20
|
+
# because the UI text area implicitly encloses the definition that the user is editing.
|
21
|
+
|
3
22
|
grammar CorrespondenceMarkupLanguage
|
4
23
|
|
24
|
+
# Include the Module containing Ruby classes representing the AST nodes
|
5
25
|
include CorrespondenceMarkup
|
6
26
|
|
27
|
+
# This rule defines a sequence of structure groups.
|
28
|
+
# Individual structure groups are independent of each other
|
29
|
+
# (although typically they will be a sequence of structure groups
|
30
|
+
# where each structure group has structures in the same sequence of languages).
|
7
31
|
rule structure_groups
|
8
32
|
s groups:("(" structure_group ")" s)*
|
9
33
|
{
|
34
|
+
# Return an array of StructureGroup object
|
10
35
|
def value
|
11
36
|
groups.elements.map {|e| e.structure_group.value}
|
12
37
|
end
|
13
38
|
}
|
14
39
|
end
|
15
40
|
|
41
|
+
# A structure group is a group of structures where each structure represents
|
42
|
+
# the same information in a different "language".
|
16
43
|
rule structure_group
|
17
44
|
s description:structure_group_description? s structures:("{" structure "}" s)*
|
18
45
|
{
|
46
|
+
# Return a StructureGroup
|
19
47
|
def value
|
20
48
|
structureObjects = structures.elements.map {|e| e.structure.value}
|
21
49
|
CorrespondenceMarkup::StructureGroup.new(structureObjects)
|
@@ -23,13 +51,23 @@ grammar CorrespondenceMarkupLanguage
|
|
23
51
|
}
|
24
52
|
end
|
25
53
|
|
54
|
+
# Optional lengthy description of a particular structure group.
|
55
|
+
# (Intended to be displayed as the title of the structure group
|
56
|
+
# describing the information presented in the structure group.)
|
26
57
|
rule structure_group_description
|
27
58
|
"#" s [^{\n]* "\n"
|
28
59
|
end
|
29
60
|
|
61
|
+
# A structure is one of two or more structures in a structure group
|
62
|
+
# (although the grammar does not impose any count restriction, in anticipation
|
63
|
+
# of application user's editing and saving incomplete content).
|
64
|
+
# A structure has a "type" (short language description intended to map to a CSS class),
|
65
|
+
# a "description" (longer but still concise language description for display to the reader)
|
66
|
+
# and a sequence of "item groups".
|
30
67
|
rule structure
|
31
68
|
structure_annotation s itemGroups:("[" item_group "]" s)*
|
32
69
|
{
|
70
|
+
# Return a Structure
|
33
71
|
def value
|
34
72
|
itemGroupObjects = itemGroups.elements.map {|e| e.item_group.value}
|
35
73
|
class_name, description = structure_annotation.value
|
@@ -38,14 +76,17 @@ grammar CorrespondenceMarkupLanguage
|
|
38
76
|
end
|
39
77
|
}
|
40
78
|
end
|
41
|
-
|
79
|
+
|
80
|
+
# Structure class (for the structure's "type"), with rules similar to those of a CSS class identifier.
|
42
81
|
rule structure_class
|
43
82
|
([a-zA-Z] [a-zA-Z0-9_-]*)?
|
44
83
|
end
|
45
84
|
|
85
|
+
# Structure annotation contains the "type" and the "description" (both optional)
|
46
86
|
rule structure_annotation
|
47
87
|
structure_class description_section:(":" s description:[^\n]* "\n")?
|
48
88
|
{
|
89
|
+
# Return an array of two strings for the type and the description
|
49
90
|
def value
|
50
91
|
class_name = structure_class.text_value
|
51
92
|
description = nil
|
@@ -57,9 +98,16 @@ grammar CorrespondenceMarkupLanguage
|
|
57
98
|
}
|
58
99
|
end
|
59
100
|
|
101
|
+
# An item group is a sub-structure of a structure which contains a sequence of items and "non-items".
|
102
|
+
# An item group has an upper-case alphabetic ID (which should be unique within a structure,
|
103
|
+
# and which should be the ID of an item-group in the first structure of a structure group, but
|
104
|
+
# neither of these rules is required by the grammar).
|
105
|
+
# The item group ID is used as a default prefix for any item IDs that do not start with
|
106
|
+
# alphabetic characters (so a full item ID is always alphabetic followed by something else).
|
60
107
|
rule item_group
|
61
108
|
optional_id:(id:[A-Z]* ":")? components:(item / non_item)*
|
62
109
|
{
|
110
|
+
# Return an ItemGroup
|
63
111
|
def value
|
64
112
|
group_id = optional_id.elements ? optional_id.elements[0].text_value : ""
|
65
113
|
componentObjects = components.elements.map {|e| e.value(group_id)}
|
@@ -68,18 +116,37 @@ grammar CorrespondenceMarkupLanguage
|
|
68
116
|
}
|
69
117
|
end
|
70
118
|
|
119
|
+
# A "non-item" is textual content in an item group that is not part of an actual item.
|
120
|
+
# In effect this is text which is either not translatable to content in other structures
|
121
|
+
# in the same structure group, or, it is considered unimportant to identify its translation.
|
122
|
+
# For example, in the second case, punctuation in sentences, where translation is reasonably obvious, and we
|
123
|
+
# wish to highlight the translations of the actual words.
|
71
124
|
rule non_item
|
72
125
|
text:text
|
73
126
|
{
|
127
|
+
# Given the item group ID (as a default prefix for the item IDs, which is ignored for non-items),
|
128
|
+
# return a NonItem.
|
74
129
|
def value(group_id = "")
|
75
130
|
CorrespondenceMarkup::NonItem.new(text.value)
|
76
131
|
end
|
77
132
|
}
|
78
133
|
end
|
79
134
|
|
135
|
+
# An item is textual content with an ID, where different items in the same structure group
|
136
|
+
# with the same ID are considered to be related to each other.
|
137
|
+
# Typically, items with the same ID in the same structure are considered to be part of the
|
138
|
+
# "same item", and items with the same ID in different structures are considered to be
|
139
|
+
# translations of each other.
|
140
|
+
# Item IDs consists of an upper-case alphabetic prefix followed by a numeric ID.
|
141
|
+
# Any item ID that lacks an alphabetic prefix will have the item group ID of the containing
|
142
|
+
# item group added as a prefix to its ID.
|
143
|
+
# (This reflects the assumption that an item usually relates to items in item groups in other
|
144
|
+
# structures with the same item group ID, but occasionally an item relates to an item in
|
145
|
+
# some other item group in another structure.)
|
80
146
|
rule item
|
81
147
|
"[" id:item_ids S text:text "]"
|
82
148
|
{
|
149
|
+
# Given the item group ID (as a default prefix for the item IDs), return an Item
|
83
150
|
def value(group_id = "")
|
84
151
|
item_ids = id.text_value.split(",")
|
85
152
|
item_ids = item_ids.map { |item_id| item_id.match(/[A-Z]/) ? item_id : group_id + item_id}
|
@@ -88,27 +155,38 @@ grammar CorrespondenceMarkupLanguage
|
|
88
155
|
}
|
89
156
|
end
|
90
157
|
|
158
|
+
# Text is the textual component of both items and non-items.
|
159
|
+
# Text is delimited by "]", "[" and (at the beginning of items) whitespace.
|
160
|
+
# Text can include backslash-quoted characters, for example to include any of the delimiter characters.
|
91
161
|
rule text
|
92
162
|
(("\\" .) / (![\[\]\\] .))+
|
93
163
|
{
|
164
|
+
# Return the text, de-quoting any backslash-quoted characters.
|
94
165
|
def value
|
95
166
|
text_value.gsub(/\\(.)/, '\1')
|
96
167
|
end
|
97
168
|
}
|
98
169
|
end
|
99
170
|
|
171
|
+
# Items can actually have multiple IDs, in which case they are separated by commas
|
172
|
+
# (and no whitespace). If there are multiple IDs, the convention of applying the
|
173
|
+
# item group ID as a default prefix is applied individually to each ID.
|
174
|
+
# So, for example, "2,A2,3" in item group B would be expanded to "B2,A2,B3".
|
100
175
|
rule item_ids
|
101
176
|
item_id ("," item_id)*
|
102
177
|
end
|
103
178
|
|
179
|
+
# An item ID - optional upper-case alphabetic prefix, followed by a numeric ID.
|
104
180
|
rule item_id
|
105
181
|
[A-Z]* [0-9]+
|
106
182
|
end
|
107
183
|
|
184
|
+
# Rule for optional whitespace
|
108
185
|
rule s
|
109
186
|
[\s\n\r\t]*
|
110
187
|
end
|
111
188
|
|
189
|
+
# Rule for mandatory whitespace
|
112
190
|
rule S
|
113
191
|
[\s\n\r\t]+
|
114
192
|
end
|
@@ -1,17 +1,31 @@
|
|
1
1
|
|
2
2
|
require 'cgi'
|
3
3
|
|
4
|
+
# Module containing the Ruby classes that define the nodes of the AST
|
5
|
+
# created when parsing Correspondence-Markup language according to the
|
6
|
+
# grammar defined by *bracketed-grammar.treetop*.
|
7
|
+
# Each node class knows how to output itself as HTML as defined by the
|
8
|
+
# method *to_html*.
|
4
9
|
module CorrespondenceMarkup
|
5
|
-
|
10
|
+
|
11
|
+
# Helper functions used when generating HTML
|
6
12
|
module Helpers
|
7
13
|
|
8
14
|
# Either 1: a tag enclosed in "<" & ">", possibly missing the ">", or, 2: text outside a tag
|
9
15
|
TAGS_AND_TEXT_REGEX = /([<][^>]*[>]?)|([^<]+)/
|
10
16
|
|
17
|
+
# Split some HTML source into tags and plain text not in tags
|
18
|
+
# (so that the two can be processed differently, e.g. applying a transformation to text content
|
19
|
+
# where you don't want the transformation to apply to the internals of a directly-coded HTML tag)
|
11
20
|
def self.split_tags_and_text(html)
|
12
21
|
html.scan(TAGS_AND_TEXT_REGEX).to_a
|
13
22
|
end
|
14
23
|
|
24
|
+
# Convert text content into HTML according to various true/false options.
|
25
|
+
# Note: the text may contain HTML tags
|
26
|
+
# * escaped: if true, HTML-escape the text
|
27
|
+
# * br: if true, convert end-of-line characters to <br/> tags
|
28
|
+
# * nbsp: if true, convert all spaces in the text that is not in tags into
|
15
29
|
def text_to_html(text, options)
|
16
30
|
html = text
|
17
31
|
if options[:escaped]
|
@@ -31,88 +45,145 @@ module CorrespondenceMarkup
|
|
31
45
|
end
|
32
46
|
|
33
47
|
# An item is text in a structure with an associated id
|
48
|
+
# Typically if would be a word in a sentence. Items are to
|
49
|
+
# be related to other items in other structures in the same
|
50
|
+
# structure group that have the same ID (also to other items
|
51
|
+
# in the same structure with the same ID).
|
34
52
|
class Item
|
53
|
+
|
35
54
|
include Helpers
|
36
55
|
|
37
|
-
|
56
|
+
# The ID, which identifies the item (possibly not uniquely) within a given structure
|
57
|
+
attr_reader :id
|
38
58
|
|
59
|
+
# The text of the item
|
60
|
+
attr_reader :text
|
61
|
+
|
62
|
+
# Initialize from ID and text
|
39
63
|
def initialize(id, text)
|
40
64
|
@id = id
|
41
65
|
@text = text
|
42
66
|
end
|
43
67
|
|
68
|
+
# Is this an item? (yes)
|
44
69
|
def item?
|
45
70
|
true
|
46
71
|
end
|
47
72
|
|
73
|
+
# An item is equal to another item with the same ID and text
|
48
74
|
def ==(otherItem)
|
49
75
|
otherItem.class == Item && otherItem.id == @id && otherItem.text == @text
|
50
76
|
end
|
51
77
|
|
78
|
+
# Convert to HTML as a *<span>* element with *data-id* attribute set to the ID
|
79
|
+
# according to options for Helpers::text_to_html
|
52
80
|
def to_html(options={})
|
53
81
|
text_html = text_to_html(@text, options)
|
54
82
|
"<span data-id=\"#{@id}\">#{text_html}</span>"
|
55
83
|
end
|
56
84
|
end
|
57
85
|
|
58
|
-
# A non-item is text in a structure that is not an item - it
|
86
|
+
# A non-item is some text in a structure that is not an item - it will
|
87
|
+
# not be related to any other text.
|
59
88
|
class NonItem
|
60
89
|
include Helpers
|
61
90
|
|
91
|
+
# The text of the non-item
|
62
92
|
attr_reader :text
|
63
93
|
|
94
|
+
# Initialize from text
|
64
95
|
def initialize(text)
|
65
96
|
@text = text
|
66
97
|
end
|
67
98
|
|
99
|
+
# Is this an item? (no)
|
68
100
|
def item?
|
69
101
|
false
|
70
102
|
end
|
71
103
|
|
104
|
+
# A non-item is equal to another non-item with the same text
|
72
105
|
def ==(otherNonItem)
|
73
106
|
otherNonItem.class == NonItem && otherNonItem.text == @text
|
74
107
|
end
|
75
108
|
|
109
|
+
# Convert to HTML according to options for Helpers::text_to_html
|
76
110
|
def to_html(options={})
|
77
111
|
text_to_html(@text, options)
|
78
112
|
end
|
79
113
|
end
|
80
114
|
|
81
115
|
# A group of items & non-items that will form part of a structure
|
116
|
+
# Typically an item group is one line of items (words), or maybe
|
117
|
+
# two or three lines which naturally group together within the
|
118
|
+
# overall structure.
|
119
|
+
# Item groups with the same ID in different structures in the same
|
120
|
+
# structure group related to each other, and may be shown next
|
121
|
+
# to each other in the UI when the "Interleave" option is chosen.
|
122
|
+
# (An "item group" could also be regarded as a "sub-structure".)
|
82
123
|
class ItemGroup
|
83
|
-
attr_reader :id, :content
|
84
124
|
|
125
|
+
# The ID which is unique in the structure. It identifies the
|
126
|
+
# item group uniquely within the structure. It also serves as a default
|
127
|
+
# prefix when parsing IDs for individual items.
|
128
|
+
attr_reader :id
|
129
|
+
|
130
|
+
# The array of items and non-items
|
131
|
+
attr_reader :content
|
132
|
+
|
133
|
+
# Initialize from ID and array of items and non-items
|
85
134
|
def initialize(id, content)
|
86
135
|
@id = id
|
87
136
|
@content = content
|
88
137
|
end
|
89
138
|
|
139
|
+
# An item group is equal to another item group with the same IDs and the same content
|
90
140
|
def ==(otherItemGroup)
|
91
141
|
otherItemGroup.class == ItemGroup && otherItemGroup.id == @id && otherItemGroup.content == @content
|
92
142
|
end
|
93
143
|
|
144
|
+
# Convert to HTML as a *<div>* tag with class *item-group*, *data-group-id* attribute
|
145
|
+
# equal to the ID, and containing the HTML output for the content items and non-items
|
146
|
+
# (with those converted according to the options for Helpers::text_to_html).
|
94
147
|
def to_html(options={})
|
95
148
|
"<div class=\"item-group\" data-group-id=\"#{@id}\">\n " +
|
96
149
|
@content.map{|x| x.to_html(options)}.join("") + "\n</div>\n"
|
97
150
|
end
|
98
151
|
end
|
99
152
|
|
100
|
-
# A structure, containing a sequence of
|
153
|
+
# A structure, containing a sequence of item groups, as well as a type and a description.
|
154
|
+
# A structure will be one of two or more in a "structure group".
|
101
155
|
class Structure
|
102
|
-
attr_reader :type, :description, :item_groups
|
103
156
|
|
157
|
+
# A short alphanumeric name for the type, typically reflecting the "language" of a structure
|
158
|
+
# where different structures in a group are different language versions of the same information.
|
159
|
+
# It is used to determine a CSS class of the structure. E.g. "english". (It can be nil.)
|
160
|
+
attr_reader :type
|
161
|
+
|
162
|
+
# A textual description of the type which will be displayed in the UI. E.g. "English".
|
163
|
+
# Ideally it should be relatively concise. Can be nil.
|
164
|
+
attr_reader :description
|
165
|
+
|
166
|
+
# The array of item groups that make up the content of the structure.
|
167
|
+
attr_reader :item_groups
|
168
|
+
|
169
|
+
# Initialize from type, description and item groups
|
104
170
|
def initialize(type, description, item_groups)
|
105
171
|
@type = type
|
106
172
|
@description = description
|
107
173
|
@item_groups = item_groups
|
108
174
|
end
|
109
175
|
|
176
|
+
# A structure is equal to another structure with the same type, description and item groups
|
110
177
|
def ==(otherStructure)
|
111
178
|
otherStructure.class == Structure && otherStructure.type == @type &&
|
112
179
|
otherStructure.description == description &&
|
113
180
|
otherStructure.item_groups == @item_groups
|
114
181
|
end
|
115
182
|
|
183
|
+
# From the type, determine the CSS class names to be used in the *<div>* element created
|
184
|
+
# by to_html. If there is no type, then just "structure", otherwise, "structure <type>-structure",
|
185
|
+
# e.g. if the type is "english", then "structure english-structure".
|
186
|
+
# (The "-structure" suffix is used to reduce the chance of accidental CSS class name collisions.)
|
116
187
|
def css_class_names
|
117
188
|
class_names = "structure"
|
118
189
|
if @type != "" and @type != nil
|
@@ -121,6 +192,9 @@ module CorrespondenceMarkup
|
|
121
192
|
class_names
|
122
193
|
end
|
123
194
|
|
195
|
+
# Convert to HTML as a *<div>* with CSS class determined by *css_class_names*.
|
196
|
+
# Include a *<div>* of CSS class "language" (if the description is given)
|
197
|
+
# Include HTML for the item groups, converted according to the options for Helpers::text_to_html).
|
124
198
|
def to_html(options={})
|
125
199
|
itemGroupHtmls = @item_groups.map{|x| x.to_html(options)}
|
126
200
|
"<div class=\"#{css_class_names}\">\n " +
|
@@ -130,18 +204,31 @@ module CorrespondenceMarkup
|
|
130
204
|
end
|
131
205
|
|
132
206
|
end
|
133
|
-
|
207
|
+
|
208
|
+
# A structure group is a group of structures. Different structures in one structure group
|
209
|
+
# all represent the same information, but in different "languages". Items different
|
210
|
+
# structures with the same item ID are shown in the UI as being translations of each other.
|
211
|
+
# (Items with the same ID in the same structure are also show as related, and are presumed
|
212
|
+
# to be separated components of a single virtual item.)
|
134
213
|
class StructureGroup
|
214
|
+
|
215
|
+
# The array of structures
|
135
216
|
attr_reader :structures
|
136
217
|
|
218
|
+
# Initialize from the structures
|
137
219
|
def initialize(structures)
|
138
220
|
@structures = structures
|
139
221
|
end
|
140
222
|
|
223
|
+
# A structure group is equal to another structure group that has the same structures
|
141
224
|
def ==(otherStructureGroup)
|
142
225
|
otherStructureGroup.class == StructureGroup && otherStructureGroup.structures == @structures
|
143
226
|
end
|
144
227
|
|
228
|
+
# Convert to HTML as a *<div>* of CSS class "structure-group" that contains the HTML
|
229
|
+
# outputs from the structures.
|
230
|
+
# Options for Helpers::text_to_html can be provided as single true/false value, or, as arrays
|
231
|
+
# of values, in which case the individual values are mapped to the corresponding structures.
|
145
232
|
def to_html(options={})
|
146
233
|
numStructures = structures.length
|
147
234
|
structureOptions = Array.new(numStructures)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: correspondence-markup
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-07-
|
12
|
+
date: 2013-07-14 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: Use to generate HTML pages containing structure groups, structures and
|
15
15
|
items as used by correspondence.js.
|
@@ -38,7 +38,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
38
38
|
version: '0'
|
39
39
|
segments:
|
40
40
|
- 0
|
41
|
-
hash: -
|
41
|
+
hash: -1042658257
|
42
42
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
43
43
|
none: false
|
44
44
|
requirements:
|