correspondence-markup 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,18 +5,18 @@ require 'correspondence-markup/bracketed-grammar'
|
|
5
5
|
|
6
6
|
module CorrespondenceMarkup
|
7
7
|
|
8
|
-
# Compiler
|
8
|
+
# Compiler that parses and compiles correspondence markup source code
|
9
9
|
# into an array of StructureGroup objects (from which HTML can be
|
10
|
-
# generated in the format required by correspondence.js)
|
10
|
+
# generated in the format required by correspondence.js).
|
11
11
|
class CorrespondenceMarkupCompiler
|
12
12
|
|
13
|
-
#
|
13
|
+
# Initialize by creating the CorrespondenceMarkupLanguageParser (defined by the Treetop source).
|
14
14
|
def initialize
|
15
15
|
@parser = CorrespondenceMarkupLanguageParser.new
|
16
16
|
end
|
17
17
|
|
18
|
-
#
|
19
|
-
# throwing an exception if there is a parse error
|
18
|
+
# Compile source code into an array of StructureGroup objects,
|
19
|
+
# throwing an exception if there is a parse error.
|
20
20
|
def compile_structure_groups(markup)
|
21
21
|
syntax_tree = @parser.parse(markup, root: :structure_groups)
|
22
22
|
if(syntax_tree.nil?)
|
@@ -1,9 +1,9 @@
|
|
1
1
|
require "correspondence-markup/types"
|
2
2
|
|
3
|
-
# Grammar for a markup language which can be compiled into HTML format
|
3
|
+
# Grammar for a markup language which can be compiled into the HTML format
|
4
4
|
# required by correspondence.js
|
5
5
|
|
6
|
-
# General note on bracketing of sequences
|
6
|
+
# General note on bracketing of sequences: different components are enclosed
|
7
7
|
# by different types of bracket, in particular:
|
8
8
|
#
|
9
9
|
# * item: "[]"
|
@@ -15,23 +15,30 @@ require "correspondence-markup/types"
|
|
15
15
|
# at which to edit components of particular content, the parsing of brackets
|
16
16
|
# is handled by the parent component, e.g. the "{}" bracketing of structures
|
17
17
|
# is specified in the grammar rule for structure_group.
|
18
|
-
#
|
18
|
+
# The motivation for this is: if a user is editing a structure definition in a UI text area,
|
19
19
|
# there should be no necessity for the user to enter the enclosing "{}" brackets,
|
20
20
|
# because the UI text area implicitly encloses the definition that the user is editing.
|
21
|
+
# So the software would want to parse the definition of a structure without the {}.
|
22
|
+
# (And if the user was editing each item group in a separate text field, then
|
23
|
+
# similarly the user would not want to include the outer "[]" brackets in each
|
24
|
+
# item group definition.)
|
21
25
|
|
22
26
|
grammar CorrespondenceMarkupLanguage
|
23
27
|
|
24
28
|
# Include the Module containing Ruby classes representing the AST nodes
|
25
29
|
include CorrespondenceMarkup
|
26
30
|
|
27
|
-
# This rule defines a sequence of structure groups.
|
31
|
+
# This rule defines a sequence of structure groups (intended to be displayed on one web page).
|
28
32
|
# Individual structure groups are independent of each other
|
29
|
-
# (although typically
|
30
|
-
# where each structure group has structures
|
33
|
+
# (although typically a sequence of structure groups
|
34
|
+
# where each structure group has structures with the same sequence of languages,
|
35
|
+
# e.g. three structure groups of two structures each with languages
|
36
|
+
# spanish/english, spanish/english, spanish/english, representing the
|
37
|
+
# translations of three verses of a song).
|
31
38
|
rule structure_groups
|
32
39
|
s groups:("(" structure_group ")" s)*
|
33
40
|
{
|
34
|
-
# Return an array of StructureGroup
|
41
|
+
# Return an array of StructureGroup's
|
35
42
|
def value
|
36
43
|
groups.elements.map {|e| e.structure_group.value}
|
37
44
|
end
|
@@ -60,10 +67,10 @@ grammar CorrespondenceMarkupLanguage
|
|
60
67
|
|
61
68
|
# A structure is one of two or more structures in a structure group
|
62
69
|
# (although the grammar does not impose any count restriction, in anticipation
|
63
|
-
# of application
|
70
|
+
# of application users editing and saving incomplete content).
|
64
71
|
# A structure has a "type" (short language description intended to map to a CSS class),
|
65
|
-
# a "description" (longer but still concise language description for display to the reader)
|
66
|
-
# and a sequence of "item groups".
|
72
|
+
# a "description" (longer but still concise language description for display to the reader),
|
73
|
+
# and contains a sequence of "item groups".
|
67
74
|
rule structure
|
68
75
|
structure_annotation s itemGroups:("[" item_group "]" s)*
|
69
76
|
{
|
@@ -119,8 +126,8 @@ grammar CorrespondenceMarkupLanguage
|
|
119
126
|
# A "non-item" is textual content in an item group that is not part of an actual item.
|
120
127
|
# In effect this is text which is either not translatable to content in other structures
|
121
128
|
# in the same structure group, or, it is considered unimportant to identify its translation.
|
122
|
-
# For example, in the second case, punctuation in sentences, where translation is reasonably
|
123
|
-
# wish to highlight the translations of the actual words.
|
129
|
+
# For example, in the second case, punctuation in sentences, where the translation is reasonably
|
130
|
+
# obvious, and we wish to highlight only the translations of the actual words.
|
124
131
|
rule non_item
|
125
132
|
text:text
|
126
133
|
{
|
@@ -137,7 +144,7 @@ grammar CorrespondenceMarkupLanguage
|
|
137
144
|
# Typically, items with the same ID in the same structure are considered to be part of the
|
138
145
|
# "same item", and items with the same ID in different structures are considered to be
|
139
146
|
# translations of each other.
|
140
|
-
#
|
147
|
+
# An item ID consists of an upper-case alphabetic prefix followed by a numeric ID.
|
141
148
|
# Any item ID that lacks an alphabetic prefix will have the item group ID of the containing
|
142
149
|
# item group added as a prefix to its ID.
|
143
150
|
# (This reflects the assumption that an item usually relates to items in item groups in other
|
@@ -168,7 +175,7 @@ grammar CorrespondenceMarkupLanguage
|
|
168
175
|
}
|
169
176
|
end
|
170
177
|
|
171
|
-
# Items can
|
178
|
+
# Items can have multiple IDs, in which case they are separated by commas
|
172
179
|
# (and no whitespace). If there are multiple IDs, the convention of applying the
|
173
180
|
# item group ID as a default prefix is applied individually to each ID.
|
174
181
|
# So, for example, "2,A2,3" in item group B would be expanded to "B2,A2,B3".
|
@@ -14,18 +14,21 @@ module CorrespondenceMarkup
|
|
14
14
|
# Either 1: a tag enclosed in "<" & ">", possibly missing the ">", or, 2: text outside a tag
|
15
15
|
TAGS_AND_TEXT_REGEX = /([<][^>]*[>]?)|([^<]+)/
|
16
16
|
|
17
|
-
# Split some HTML source into tags and plain text not in tags
|
18
|
-
# (so that the two can be processed differently, e.g. applying a transformation to text content
|
19
|
-
# where you don't want the transformation to apply to the internals of
|
17
|
+
# Split some HTML source into tags and plain text not in tags.
|
18
|
+
# (For example, so that the two can be processed differently, e.g. applying a transformation to text content
|
19
|
+
# where you don't want the transformation to apply to the internals of directly-coded HTML tags.)
|
20
20
|
def self.split_tags_and_text(html)
|
21
21
|
html.scan(TAGS_AND_TEXT_REGEX).to_a
|
22
22
|
end
|
23
23
|
|
24
24
|
# Convert text content into HTML according to various true/false options.
|
25
25
|
# Note: the text may contain HTML tags
|
26
|
-
# * escaped
|
27
|
-
# * br
|
28
|
-
# * nbsp
|
26
|
+
# * :escaped - if true, HTML-escape the text
|
27
|
+
# * :br - if true, convert end-of-line characters to <br/> tags
|
28
|
+
# * :nbsp - if true, convert all spaces in the text that is not in tags into
|
29
|
+
# Of these options, *:escaped* only makes sense if you _don't_ want to include additional HTML
|
30
|
+
# markup in the content; *:br* and *:nbsp* make sense for programming languages but not for
|
31
|
+
# natural languages.
|
29
32
|
def text_to_html(text, options)
|
30
33
|
html = text
|
31
34
|
if options[:escaped]
|
@@ -44,22 +47,29 @@ module CorrespondenceMarkup
|
|
44
47
|
end
|
45
48
|
end
|
46
49
|
|
47
|
-
# An
|
50
|
+
# An Item is text in a structure with an associated ID.
|
48
51
|
# Typically if would be a word in a sentence. Items are to
|
49
52
|
# be related to other items in other structures in the same
|
50
|
-
# structure group that have the same ID
|
51
|
-
# in the same structure
|
53
|
+
# structure group that have the same ID.
|
54
|
+
# When two or more items in the same structure have the same ID,
|
55
|
+
# they are considered to be parts of the same item.
|
56
|
+
# (For example, in "I let it go", we might want to identify "let" and "go" as a single item,
|
57
|
+
# because they are part of an English phrasal verb "let go",
|
58
|
+
# and its meaning is not quite the sum of the meanings of those two component words.)
|
52
59
|
class Item
|
53
60
|
|
54
61
|
include Helpers
|
55
62
|
|
56
|
-
# The ID, which identifies the item (possibly not uniquely) within a given structure
|
63
|
+
# The ID, which identifies the item (possibly not uniquely) within a given structure.
|
64
|
+
# An ID can be a comma-separated string of multiple IDs (this is relevant for partial
|
65
|
+
# matching, and should only be used when there are more than two structures in a group
|
66
|
+
# and one of the structures has less granularity than other structures in that group).
|
57
67
|
attr_reader :id
|
58
68
|
|
59
|
-
# The text of the item
|
69
|
+
# The text of the item.
|
60
70
|
attr_reader :text
|
61
71
|
|
62
|
-
# Initialize from ID and text
|
72
|
+
# Initialize from ID and text.
|
63
73
|
def initialize(id, text)
|
64
74
|
@id = id
|
65
75
|
@text = text
|
@@ -71,6 +81,7 @@ module CorrespondenceMarkup
|
|
71
81
|
end
|
72
82
|
|
73
83
|
# An item is equal to another item with the same ID and text
|
84
|
+
# (equality is only used for testing)
|
74
85
|
def ==(otherItem)
|
75
86
|
otherItem.class == Item && otherItem.id == @id && otherItem.text == @text
|
76
87
|
end
|
@@ -102,6 +113,7 @@ module CorrespondenceMarkup
|
|
102
113
|
end
|
103
114
|
|
104
115
|
# A non-item is equal to another non-item with the same text
|
116
|
+
# (equality is only used for testing)
|
105
117
|
def ==(otherNonItem)
|
106
118
|
otherNonItem.class == NonItem && otherNonItem.text == @text
|
107
119
|
end
|
@@ -112,14 +124,15 @@ module CorrespondenceMarkup
|
|
112
124
|
end
|
113
125
|
end
|
114
126
|
|
115
|
-
# A group of items & non-items that will form part of a structure
|
116
|
-
# Typically an item group is one line of items (words), or maybe
|
127
|
+
# A group of items & non-items that will form part of a structure.
|
128
|
+
# Typically an item group is one line of items (i.e. words) and non-items, or maybe
|
117
129
|
# two or three lines which naturally group together within the
|
118
|
-
# overall structure
|
130
|
+
# overall structure (and which cannot be separated because they
|
131
|
+
# translate to a single line in one of the other structures in the
|
132
|
+
# same structure group).
|
119
133
|
# Item groups with the same ID in different structures in the same
|
120
|
-
# structure group related to each other, and may be shown next
|
134
|
+
# structure group are related to each other, and may be shown next
|
121
135
|
# to each other in the UI when the "Interleave" option is chosen.
|
122
|
-
# (An "item group" could also be regarded as a "sub-structure".)
|
123
136
|
class ItemGroup
|
124
137
|
|
125
138
|
# The ID which is unique in the structure. It identifies the
|
@@ -137,6 +150,7 @@ module CorrespondenceMarkup
|
|
137
150
|
end
|
138
151
|
|
139
152
|
# An item group is equal to another item group with the same IDs and the same content
|
153
|
+
# (equality is only used for testing)
|
140
154
|
def ==(otherItemGroup)
|
141
155
|
otherItemGroup.class == ItemGroup && otherItemGroup.id == @id && otherItemGroup.content == @content
|
142
156
|
end
|
@@ -174,6 +188,7 @@ module CorrespondenceMarkup
|
|
174
188
|
end
|
175
189
|
|
176
190
|
# A structure is equal to another structure with the same type, description and item groups
|
191
|
+
# (equality is only used for testing)
|
177
192
|
def ==(otherStructure)
|
178
193
|
otherStructure.class == Structure && otherStructure.type == @type &&
|
179
194
|
otherStructure.description == description &&
|
@@ -206,10 +221,10 @@ module CorrespondenceMarkup
|
|
206
221
|
end
|
207
222
|
|
208
223
|
# A structure group is a group of structures. Different structures in one structure group
|
209
|
-
# all represent the same information, but in different "languages". Items different
|
224
|
+
# all represent the same information, but in different "languages". Items in different
|
210
225
|
# structures with the same item ID are shown in the UI as being translations of each other.
|
211
|
-
# (Items with the same ID in the same structure are also
|
212
|
-
# to be
|
226
|
+
# (Items with the same ID in the same structure are also shown as related, and are presumed
|
227
|
+
# to be different parts of a single virtual item.)
|
213
228
|
class StructureGroup
|
214
229
|
|
215
230
|
# The array of structures
|
@@ -221,6 +236,7 @@ module CorrespondenceMarkup
|
|
221
236
|
end
|
222
237
|
|
223
238
|
# A structure group is equal to another structure group that has the same structures
|
239
|
+
# (equality is only used for testing)
|
224
240
|
def ==(otherStructureGroup)
|
225
241
|
otherStructureGroup.class == StructureGroup && otherStructureGroup.structures == @structures
|
226
242
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: correspondence-markup
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -38,7 +38,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
38
38
|
version: '0'
|
39
39
|
segments:
|
40
40
|
- 0
|
41
|
-
hash:
|
41
|
+
hash: 617305043
|
42
42
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
43
43
|
none: false
|
44
44
|
requirements:
|