correspondence-markup 0.3.1 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
@@ -5,18 +5,18 @@ require 'correspondence-markup/bracketed-grammar'
|
|
5
5
|
|
6
6
|
module CorrespondenceMarkup
|
7
7
|
|
8
|
-
# Compiler
|
8
|
+
# Compiler that parses and compiles correspondence markup source code
|
9
9
|
# into an array of StructureGroup objects (from which HTML can be
|
10
|
-
# generated in the format required by correspondence.js)
|
10
|
+
# generated in the format required by correspondence.js).
|
11
11
|
class CorrespondenceMarkupCompiler
|
12
12
|
|
13
|
-
#
|
13
|
+
# Initialize by creating the CorrespondenceMarkupLanguageParser (defined by the Treetop source).
|
14
14
|
def initialize
|
15
15
|
@parser = CorrespondenceMarkupLanguageParser.new
|
16
16
|
end
|
17
17
|
|
18
|
-
#
|
19
|
-
# throwing an exception if there is a parse error
|
18
|
+
# Compile source code into an array of StructureGroup objects,
|
19
|
+
# throwing an exception if there is a parse error.
|
20
20
|
def compile_structure_groups(markup)
|
21
21
|
syntax_tree = @parser.parse(markup, root: :structure_groups)
|
22
22
|
if(syntax_tree.nil?)
|
@@ -1,9 +1,9 @@
|
|
1
1
|
require "correspondence-markup/types"
|
2
2
|
|
3
|
-
# Grammar for a markup language which can be compiled into HTML format
|
3
|
+
# Grammar for a markup language which can be compiled into the HTML format
|
4
4
|
# required by correspondence.js
|
5
5
|
|
6
|
-
# General note on bracketing of sequences
|
6
|
+
# General note on bracketing of sequences: different components are enclosed
|
7
7
|
# by different types of bracket, in particular:
|
8
8
|
#
|
9
9
|
# * item: "[]"
|
@@ -15,23 +15,30 @@ require "correspondence-markup/types"
|
|
15
15
|
# at which to edit components of particular content, the parsing of brackets
|
16
16
|
# is handled by the parent component, e.g. the "{}" bracketing of structures
|
17
17
|
# is specified in the grammar rule for structure_group.
|
18
|
-
#
|
18
|
+
# The motivation for this is: if a user is editing a structure definition in a UI text area,
|
19
19
|
# there should be no necessity for the user to enter the enclosing "{}" brackets,
|
20
20
|
# because the UI text area implicitly encloses the definition that the user is editing.
|
21
|
+
# So the software would want to parse the definition of a structure without the {}.
|
22
|
+
# (And if the user was editing each item group in a separate text field, then
|
23
|
+
# similarly the user would not want to include the outer "[]" brackets in each
|
24
|
+
# item group definition.)
|
21
25
|
|
22
26
|
grammar CorrespondenceMarkupLanguage
|
23
27
|
|
24
28
|
# Include the Module containing Ruby classes representing the AST nodes
|
25
29
|
include CorrespondenceMarkup
|
26
30
|
|
27
|
-
# This rule defines a sequence of structure groups.
|
31
|
+
# This rule defines a sequence of structure groups (intended to be displayed on one web page).
|
28
32
|
# Individual structure groups are independent of each other
|
29
|
-
# (although typically
|
30
|
-
# where each structure group has structures
|
33
|
+
# (although typically a sequence of structure groups
|
34
|
+
# where each structure group has structures with the same sequence of languages,
|
35
|
+
# e.g. three structure groups of two structures each with languages
|
36
|
+
# spanish/english, spanish/english, spanish/english, representing the
|
37
|
+
# translations of three verses of a song).
|
31
38
|
rule structure_groups
|
32
39
|
s groups:("(" structure_group ")" s)*
|
33
40
|
{
|
34
|
-
# Return an array of StructureGroup
|
41
|
+
# Return an array of StructureGroup's
|
35
42
|
def value
|
36
43
|
groups.elements.map {|e| e.structure_group.value}
|
37
44
|
end
|
@@ -60,10 +67,10 @@ grammar CorrespondenceMarkupLanguage
|
|
60
67
|
|
61
68
|
# A structure is one of two or more structures in a structure group
|
62
69
|
# (although the grammar does not impose any count restriction, in anticipation
|
63
|
-
# of application
|
70
|
+
# of application users editing and saving incomplete content).
|
64
71
|
# A structure has a "type" (short language description intended to map to a CSS class),
|
65
|
-
# a "description" (longer but still concise language description for display to the reader)
|
66
|
-
# and a sequence of "item groups".
|
72
|
+
# a "description" (longer but still concise language description for display to the reader),
|
73
|
+
# and contains a sequence of "item groups".
|
67
74
|
rule structure
|
68
75
|
structure_annotation s itemGroups:("[" item_group "]" s)*
|
69
76
|
{
|
@@ -119,8 +126,8 @@ grammar CorrespondenceMarkupLanguage
|
|
119
126
|
# A "non-item" is textual content in an item group that is not part of an actual item.
|
120
127
|
# In effect this is text which is either not translatable to content in other structures
|
121
128
|
# in the same structure group, or, it is considered unimportant to identify its translation.
|
122
|
-
# For example, in the second case, punctuation in sentences, where translation is reasonably
|
123
|
-
# wish to highlight the translations of the actual words.
|
129
|
+
# For example, in the second case, punctuation in sentences, where the translation is reasonably
|
130
|
+
# obvious, and we wish to highlight only the translations of the actual words.
|
124
131
|
rule non_item
|
125
132
|
text:text
|
126
133
|
{
|
@@ -137,7 +144,7 @@ grammar CorrespondenceMarkupLanguage
|
|
137
144
|
# Typically, items with the same ID in the same structure are considered to be part of the
|
138
145
|
# "same item", and items with the same ID in different structures are considered to be
|
139
146
|
# translations of each other.
|
140
|
-
#
|
147
|
+
# An item ID consists of an upper-case alphabetic prefix followed by a numeric ID.
|
141
148
|
# Any item ID that lacks an alphabetic prefix will have the item group ID of the containing
|
142
149
|
# item group added as a prefix to its ID.
|
143
150
|
# (This reflects the assumption that an item usually relates to items in item groups in other
|
@@ -168,7 +175,7 @@ grammar CorrespondenceMarkupLanguage
|
|
168
175
|
}
|
169
176
|
end
|
170
177
|
|
171
|
-
# Items can
|
178
|
+
# Items can have multiple IDs, in which case they are separated by commas
|
172
179
|
# (and no whitespace). If there are multiple IDs, the convention of applying the
|
173
180
|
# item group ID as a default prefix is applied individually to each ID.
|
174
181
|
# So, for example, "2,A2,3" in item group B would be expanded to "B2,A2,B3".
|
@@ -14,18 +14,21 @@ module CorrespondenceMarkup
|
|
14
14
|
# Either 1: a tag enclosed in "<" & ">", possibly missing the ">", or, 2: text outside a tag
|
15
15
|
TAGS_AND_TEXT_REGEX = /([<][^>]*[>]?)|([^<]+)/
|
16
16
|
|
17
|
-
# Split some HTML source into tags and plain text not in tags
|
18
|
-
# (so that the two can be processed differently, e.g. applying a transformation to text content
|
19
|
-
# where you don't want the transformation to apply to the internals of
|
17
|
+
# Split some HTML source into tags and plain text not in tags.
|
18
|
+
# (For example, so that the two can be processed differently, e.g. applying a transformation to text content
|
19
|
+
# where you don't want the transformation to apply to the internals of directly-coded HTML tags.)
|
20
20
|
def self.split_tags_and_text(html)
|
21
21
|
html.scan(TAGS_AND_TEXT_REGEX).to_a
|
22
22
|
end
|
23
23
|
|
24
24
|
# Convert text content into HTML according to various true/false options.
|
25
25
|
# Note: the text may contain HTML tags
|
26
|
-
# * escaped
|
27
|
-
# * br
|
28
|
-
# * nbsp
|
26
|
+
# * :escaped - if true, HTML-escape the text
|
27
|
+
# * :br - if true, convert end-of-line characters to <br/> tags
|
28
|
+
# * :nbsp - if true, convert all spaces in the text that is not in tags into
|
29
|
+
# Of these options, *:escaped* only makes sense if you _don't_ want to include additional HTML
|
30
|
+
# markup in the content; *:br* and *:nbsp* make sense for programming languages but not for
|
31
|
+
# natural languages.
|
29
32
|
def text_to_html(text, options)
|
30
33
|
html = text
|
31
34
|
if options[:escaped]
|
@@ -44,22 +47,29 @@ module CorrespondenceMarkup
|
|
44
47
|
end
|
45
48
|
end
|
46
49
|
|
47
|
-
# An
|
50
|
+
# An Item is text in a structure with an associated ID.
|
48
51
|
# Typically if would be a word in a sentence. Items are to
|
49
52
|
# be related to other items in other structures in the same
|
50
|
-
# structure group that have the same ID
|
51
|
-
# in the same structure
|
53
|
+
# structure group that have the same ID.
|
54
|
+
# When two or more items in the same structure have the same ID,
|
55
|
+
# they are considered to be parts of the same item.
|
56
|
+
# (For example, in "I let it go", we might want to identify "let" and "go" as a single item,
|
57
|
+
# because they are part of an English phrasal verb "let go",
|
58
|
+
# and its meaning is not quite the sum of the meanings of those two component words.)
|
52
59
|
class Item
|
53
60
|
|
54
61
|
include Helpers
|
55
62
|
|
56
|
-
# The ID, which identifies the item (possibly not uniquely) within a given structure
|
63
|
+
# The ID, which identifies the item (possibly not uniquely) within a given structure.
|
64
|
+
# An ID can be a comma-separated string of multiple IDs (this is relevant for partial
|
65
|
+
# matching, and should only be used when there are more than two structures in a group
|
66
|
+
# and one of the structures has less granularity than other structures in that group).
|
57
67
|
attr_reader :id
|
58
68
|
|
59
|
-
# The text of the item
|
69
|
+
# The text of the item.
|
60
70
|
attr_reader :text
|
61
71
|
|
62
|
-
# Initialize from ID and text
|
72
|
+
# Initialize from ID and text.
|
63
73
|
def initialize(id, text)
|
64
74
|
@id = id
|
65
75
|
@text = text
|
@@ -71,6 +81,7 @@ module CorrespondenceMarkup
|
|
71
81
|
end
|
72
82
|
|
73
83
|
# An item is equal to another item with the same ID and text
|
84
|
+
# (equality is only used for testing)
|
74
85
|
def ==(otherItem)
|
75
86
|
otherItem.class == Item && otherItem.id == @id && otherItem.text == @text
|
76
87
|
end
|
@@ -102,6 +113,7 @@ module CorrespondenceMarkup
|
|
102
113
|
end
|
103
114
|
|
104
115
|
# A non-item is equal to another non-item with the same text
|
116
|
+
# (equality is only used for testing)
|
105
117
|
def ==(otherNonItem)
|
106
118
|
otherNonItem.class == NonItem && otherNonItem.text == @text
|
107
119
|
end
|
@@ -112,14 +124,15 @@ module CorrespondenceMarkup
|
|
112
124
|
end
|
113
125
|
end
|
114
126
|
|
115
|
-
# A group of items & non-items that will form part of a structure
|
116
|
-
# Typically an item group is one line of items (words), or maybe
|
127
|
+
# A group of items & non-items that will form part of a structure.
|
128
|
+
# Typically an item group is one line of items (i.e. words) and non-items, or maybe
|
117
129
|
# two or three lines which naturally group together within the
|
118
|
-
# overall structure
|
130
|
+
# overall structure (and which cannot be separated because they
|
131
|
+
# translate to a single line in one of the other structures in the
|
132
|
+
# same structure group).
|
119
133
|
# Item groups with the same ID in different structures in the same
|
120
|
-
# structure group related to each other, and may be shown next
|
134
|
+
# structure group are related to each other, and may be shown next
|
121
135
|
# to each other in the UI when the "Interleave" option is chosen.
|
122
|
-
# (An "item group" could also be regarded as a "sub-structure".)
|
123
136
|
class ItemGroup
|
124
137
|
|
125
138
|
# The ID which is unique in the structure. It identifies the
|
@@ -137,6 +150,7 @@ module CorrespondenceMarkup
|
|
137
150
|
end
|
138
151
|
|
139
152
|
# An item group is equal to another item group with the same IDs and the same content
|
153
|
+
# (equality is only used for testing)
|
140
154
|
def ==(otherItemGroup)
|
141
155
|
otherItemGroup.class == ItemGroup && otherItemGroup.id == @id && otherItemGroup.content == @content
|
142
156
|
end
|
@@ -174,6 +188,7 @@ module CorrespondenceMarkup
|
|
174
188
|
end
|
175
189
|
|
176
190
|
# A structure is equal to another structure with the same type, description and item groups
|
191
|
+
# (equality is only used for testing)
|
177
192
|
def ==(otherStructure)
|
178
193
|
otherStructure.class == Structure && otherStructure.type == @type &&
|
179
194
|
otherStructure.description == description &&
|
@@ -206,10 +221,10 @@ module CorrespondenceMarkup
|
|
206
221
|
end
|
207
222
|
|
208
223
|
# A structure group is a group of structures. Different structures in one structure group
|
209
|
-
# all represent the same information, but in different "languages". Items different
|
224
|
+
# all represent the same information, but in different "languages". Items in different
|
210
225
|
# structures with the same item ID are shown in the UI as being translations of each other.
|
211
|
-
# (Items with the same ID in the same structure are also
|
212
|
-
# to be
|
226
|
+
# (Items with the same ID in the same structure are also shown as related, and are presumed
|
227
|
+
# to be different parts of a single virtual item.)
|
213
228
|
class StructureGroup
|
214
229
|
|
215
230
|
# The array of structures
|
@@ -221,6 +236,7 @@ module CorrespondenceMarkup
|
|
221
236
|
end
|
222
237
|
|
223
238
|
# A structure group is equal to another structure group that has the same structures
|
239
|
+
# (equality is only used for testing)
|
224
240
|
def ==(otherStructureGroup)
|
225
241
|
otherStructureGroup.class == StructureGroup && otherStructureGroup.structures == @structures
|
226
242
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: correspondence-markup
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -38,7 +38,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
38
38
|
version: '0'
|
39
39
|
segments:
|
40
40
|
- 0
|
41
|
-
hash:
|
41
|
+
hash: 617305043
|
42
42
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
43
43
|
none: false
|
44
44
|
requirements:
|