correspondence-markup 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,11 +5,18 @@ require 'correspondence-markup/bracketed-grammar'
5
5
 
6
6
  module CorrespondenceMarkup
7
7
 
8
+ # Compiler than parses and compiles correspondence markup source code
9
+ # into an array of StructureGroup objects (from which HTML can be
10
+ # generated in the format required by correspondence.js)
8
11
  class CorrespondenceMarkupCompiler
12
+
13
+ # initialize by creating the CorrespondenceMarkupLanguageParser (defined by the Treetop source)
9
14
  def initialize
10
15
  @parser = CorrespondenceMarkupLanguageParser.new
11
16
  end
12
17
 
18
+ # compile source code into an array of StructureGroups,
19
+ # throwing an exception if there is a parse error
13
20
  def compile_structure_groups(markup)
14
21
  syntax_tree = @parser.parse(markup, root: :structure_groups)
15
22
  if(syntax_tree.nil?)
@@ -19,8 +26,4 @@ module CorrespondenceMarkup
19
26
  end
20
27
  end
21
28
 
22
- def self.sayHello
23
- "hello"
24
- end
25
-
26
29
  end
@@ -1,21 +1,49 @@
1
1
  require "correspondence-markup/types"
2
2
 
3
+ # Grammar for a markup language which can be compiled into HTML format
4
+ # required by correspondence.js
5
+
6
+ # General note on bracketing of sequences - different components are enclosed
7
+ # by different types of bracket, in particular:
8
+ #
9
+ # * item: "[]"
10
+ # * item-group: "[]"
11
+ # * structure: "{}"
12
+ # * structure group: "()"
13
+ #
14
+ # However, in anticipation of a UI where the user may choose the granularity
15
+ # at which to edit components of particular content, the parsing of brackets
16
+ # is handled by the parent component, e.g. the "{}" bracketing of structures
17
+ # is specified in the grammar rule for structure_group.
18
+ # For example, if a user is editing a structure definition in a UI text area,
19
+ # there should be no necessity for the user to enter the enclosing "{}" brackets,
20
+ # because the UI text area implicitly encloses the definition that the user is editing.
21
+
3
22
  grammar CorrespondenceMarkupLanguage
4
23
 
24
+ # Include the Module containing Ruby classes representing the AST nodes
5
25
  include CorrespondenceMarkup
6
26
 
27
+ # This rule defines a sequence of structure groups.
28
+ # Individual structure groups are independent of each other
29
+ # (although typically they will be a sequence of structure groups
30
+ # where each structure group has structures in the same sequence of languages).
7
31
  rule structure_groups
8
32
  s groups:("(" structure_group ")" s)*
9
33
  {
34
+ # Return an array of StructureGroup object
10
35
  def value
11
36
  groups.elements.map {|e| e.structure_group.value}
12
37
  end
13
38
  }
14
39
  end
15
40
 
41
+ # A structure group is a group of structures where each structure represents
42
+ # the same information in a different "language".
16
43
  rule structure_group
17
44
  s description:structure_group_description? s structures:("{" structure "}" s)*
18
45
  {
46
+ # Return a StructureGroup
19
47
  def value
20
48
  structureObjects = structures.elements.map {|e| e.structure.value}
21
49
  CorrespondenceMarkup::StructureGroup.new(structureObjects)
@@ -23,13 +51,23 @@ grammar CorrespondenceMarkupLanguage
23
51
  }
24
52
  end
25
53
 
54
+ # Optional lengthy description of a particular structure group.
55
+ # (Intended to be displayed as the title of the structure group
56
+ # describing the information presented in the structure group.)
26
57
  rule structure_group_description
27
58
  "#" s [^{\n]* "\n"
28
59
  end
29
60
 
61
+ # A structure is one of two or more structures in a structure group
62
+ # (although the grammar does not impose any count restriction, in anticipation
63
+ # of application user's editing and saving incomplete content).
64
+ # A structure has a "type" (short language description intended to map to a CSS class),
65
+ # a "description" (longer but still concise language description for display to the reader)
66
+ # and a sequence of "item groups".
30
67
  rule structure
31
68
  structure_annotation s itemGroups:("[" item_group "]" s)*
32
69
  {
70
+ # Return a Structure
33
71
  def value
34
72
  itemGroupObjects = itemGroups.elements.map {|e| e.item_group.value}
35
73
  class_name, description = structure_annotation.value
@@ -38,14 +76,17 @@ grammar CorrespondenceMarkupLanguage
38
76
  end
39
77
  }
40
78
  end
41
-
79
+
80
+ # Structure class (for the structure's "type"), with rules similar to those of a CSS class identifier.
42
81
  rule structure_class
43
82
  ([a-zA-Z] [a-zA-Z0-9_-]*)?
44
83
  end
45
84
 
85
+ # Structure annotation contains the "type" and the "description" (both optional)
46
86
  rule structure_annotation
47
87
  structure_class description_section:(":" s description:[^\n]* "\n")?
48
88
  {
89
+ # Return an array of two strings for the type and the description
49
90
  def value
50
91
  class_name = structure_class.text_value
51
92
  description = nil
@@ -57,9 +98,16 @@ grammar CorrespondenceMarkupLanguage
57
98
  }
58
99
  end
59
100
 
101
+ # An item group is a sub-structure of a structure which contains a sequence of items and "non-items".
102
+ # An item group has an upper-case alphabetic ID (which should be unique within a structure,
103
+ # and which should be the ID of an item-group in the first structure of a structure group, but
104
+ # neither of these rules is required by the grammar).
105
+ # The item group ID is used as a default prefix for any item IDs that do not start with
106
+ # alphabetic characters (so a full item ID is always alphabetic followed by something else).
60
107
  rule item_group
61
108
  optional_id:(id:[A-Z]* ":")? components:(item / non_item)*
62
109
  {
110
+ # Return an ItemGroup
63
111
  def value
64
112
  group_id = optional_id.elements ? optional_id.elements[0].text_value : ""
65
113
  componentObjects = components.elements.map {|e| e.value(group_id)}
@@ -68,18 +116,37 @@ grammar CorrespondenceMarkupLanguage
68
116
  }
69
117
  end
70
118
 
119
+ # A "non-item" is textual content in an item group that is not part of an actual item.
120
+ # In effect this is text which is either not translatable to content in other structures
121
+ # in the same structure group, or, it is considered unimportant to identify its translation.
122
+ # For example, in the second case, punctuation in sentences, where translation is reasonably obvious, and we
123
+ # wish to highlight the translations of the actual words.
71
124
  rule non_item
72
125
  text:text
73
126
  {
127
+ # Given the item group ID (as a default prefix for the item IDs, which is ignored for non-items),
128
+ # return a NonItem.
74
129
  def value(group_id = "")
75
130
  CorrespondenceMarkup::NonItem.new(text.value)
76
131
  end
77
132
  }
78
133
  end
79
134
 
135
+ # An item is textual content with an ID, where different items in the same structure group
136
+ # with the same ID are considered to be related to each other.
137
+ # Typically, items with the same ID in the same structure are considered to be part of the
138
+ # "same item", and items with the same ID in different structures are considered to be
139
+ # translations of each other.
140
+ # Item IDs consists of an upper-case alphabetic prefix followed by a numeric ID.
141
+ # Any item ID that lacks an alphabetic prefix will have the item group ID of the containing
142
+ # item group added as a prefix to its ID.
143
+ # (This reflects the assumption that an item usually relates to items in item groups in other
144
+ # structures with the same item group ID, but occasionally an item relates to an item in
145
+ # some other item group in another structure.)
80
146
  rule item
81
147
  "[" id:item_ids S text:text "]"
82
148
  {
149
+ # Given the item group ID (as a default prefix for the item IDs), return an Item
83
150
  def value(group_id = "")
84
151
  item_ids = id.text_value.split(",")
85
152
  item_ids = item_ids.map { |item_id| item_id.match(/[A-Z]/) ? item_id : group_id + item_id}
@@ -88,27 +155,38 @@ grammar CorrespondenceMarkupLanguage
88
155
  }
89
156
  end
90
157
 
158
+ # Text is the textual component of both items and non-items.
159
+ # Text is delimited by "]", "[" and (at the beginning of items) whitespace.
160
+ # Text can include backslash-quoted characters, for example to include any of the delimiter characters.
91
161
  rule text
92
162
  (("\\" .) / (![\[\]\\] .))+
93
163
  {
164
+ # Return the text, de-quoting any backslash-quoted characters.
94
165
  def value
95
166
  text_value.gsub(/\\(.)/, '\1')
96
167
  end
97
168
  }
98
169
  end
99
170
 
171
+ # Items can actually have multiple IDs, in which case they are separated by commas
172
+ # (and no whitespace). If there are multiple IDs, the convention of applying the
173
+ # item group ID as a default prefix is applied individually to each ID.
174
+ # So, for example, "2,A2,3" in item group B would be expanded to "B2,A2,B3".
100
175
  rule item_ids
101
176
  item_id ("," item_id)*
102
177
  end
103
178
 
179
+ # An item ID - optional upper-case alphabetic prefix, followed by a numeric ID.
104
180
  rule item_id
105
181
  [A-Z]* [0-9]+
106
182
  end
107
183
 
184
+ # Rule for optional whitespace
108
185
  rule s
109
186
  [\s\n\r\t]*
110
187
  end
111
188
 
189
+ # Rule for mandatory whitespace
112
190
  rule S
113
191
  [\s\n\r\t]+
114
192
  end
@@ -1,17 +1,31 @@
1
1
 
2
2
  require 'cgi'
3
3
 
4
+ # Module containing the Ruby classes that define the nodes of the AST
5
+ # created when parsing Correspondence-Markup language according to the
6
+ # grammar defined by *bracketed-grammar.treetop*.
7
+ # Each node class knows how to output itself as HTML as defined by the
8
+ # method *to_html*.
4
9
  module CorrespondenceMarkup
5
-
10
+
11
+ # Helper functions used when generating HTML
6
12
  module Helpers
7
13
 
8
14
  # Either 1: a tag enclosed in "<" & ">", possibly missing the ">", or, 2: text outside a tag
9
15
  TAGS_AND_TEXT_REGEX = /([<][^>]*[>]?)|([^<]+)/
10
16
 
17
+ # Split some HTML source into tags and plain text not in tags
18
+ # (so that the two can be processed differently, e.g. applying a transformation to text content
19
+ # where you don't want the transformation to apply to the internals of a directly-coded HTML tag)
11
20
  def self.split_tags_and_text(html)
12
21
  html.scan(TAGS_AND_TEXT_REGEX).to_a
13
22
  end
14
23
 
24
+ # Convert text content into HTML according to various true/false options.
25
+ # Note: the text may contain HTML tags
26
+ # * escaped: if true, HTML-escape the text
27
+ # * br: if true, convert end-of-line characters to <br/> tags
28
+ # * nbsp: if true, convert all spaces in the text that is not in tags into &nbsp;
15
29
  def text_to_html(text, options)
16
30
  html = text
17
31
  if options[:escaped]
@@ -31,88 +45,145 @@ module CorrespondenceMarkup
31
45
  end
32
46
 
33
47
  # An item is text in a structure with an associated id
48
+ # Typically if would be a word in a sentence. Items are to
49
+ # be related to other items in other structures in the same
50
+ # structure group that have the same ID (also to other items
51
+ # in the same structure with the same ID).
34
52
  class Item
53
+
35
54
  include Helpers
36
55
 
37
- attr_reader :id, :text
56
+ # The ID, which identifies the item (possibly not uniquely) within a given structure
57
+ attr_reader :id
38
58
 
59
+ # The text of the item
60
+ attr_reader :text
61
+
62
+ # Initialize from ID and text
39
63
  def initialize(id, text)
40
64
  @id = id
41
65
  @text = text
42
66
  end
43
67
 
68
+ # Is this an item? (yes)
44
69
  def item?
45
70
  true
46
71
  end
47
72
 
73
+ # An item is equal to another item with the same ID and text
48
74
  def ==(otherItem)
49
75
  otherItem.class == Item && otherItem.id == @id && otherItem.text == @text
50
76
  end
51
77
 
78
+ # Convert to HTML as a *<span>* element with *data-id* attribute set to the ID
79
+ # according to options for Helpers::text_to_html
52
80
  def to_html(options={})
53
81
  text_html = text_to_html(@text, options)
54
82
  "<span data-id=\"#{@id}\">#{text_html}</span>"
55
83
  end
56
84
  end
57
85
 
58
- # A non-item is text in a structure that is not an item - it is just text
86
+ # A non-item is some text in a structure that is not an item - it will
87
+ # not be related to any other text.
59
88
  class NonItem
60
89
  include Helpers
61
90
 
91
+ # The text of the non-item
62
92
  attr_reader :text
63
93
 
94
+ # Initialize from text
64
95
  def initialize(text)
65
96
  @text = text
66
97
  end
67
98
 
99
+ # Is this an item? (no)
68
100
  def item?
69
101
  false
70
102
  end
71
103
 
104
+ # A non-item is equal to another non-item with the same text
72
105
  def ==(otherNonItem)
73
106
  otherNonItem.class == NonItem && otherNonItem.text == @text
74
107
  end
75
108
 
109
+ # Convert to HTML according to options for Helpers::text_to_html
76
110
  def to_html(options={})
77
111
  text_to_html(@text, options)
78
112
  end
79
113
  end
80
114
 
81
115
  # A group of items & non-items that will form part of a structure
116
+ # Typically an item group is one line of items (words), or maybe
117
+ # two or three lines which naturally group together within the
118
+ # overall structure.
119
+ # Item groups with the same ID in different structures in the same
120
+ # structure group related to each other, and may be shown next
121
+ # to each other in the UI when the "Interleave" option is chosen.
122
+ # (An "item group" could also be regarded as a "sub-structure".)
82
123
  class ItemGroup
83
- attr_reader :id, :content
84
124
 
125
+ # The ID which is unique in the structure. It identifies the
126
+ # item group uniquely within the structure. It also serves as a default
127
+ # prefix when parsing IDs for individual items.
128
+ attr_reader :id
129
+
130
+ # The array of items and non-items
131
+ attr_reader :content
132
+
133
+ # Initialize from ID and array of items and non-items
85
134
  def initialize(id, content)
86
135
  @id = id
87
136
  @content = content
88
137
  end
89
138
 
139
+ # An item group is equal to another item group with the same IDs and the same content
90
140
  def ==(otherItemGroup)
91
141
  otherItemGroup.class == ItemGroup && otherItemGroup.id == @id && otherItemGroup.content == @content
92
142
  end
93
143
 
144
+ # Convert to HTML as a *<div>* tag with class *item-group*, *data-group-id* attribute
145
+ # equal to the ID, and containing the HTML output for the content items and non-items
146
+ # (with those converted according to the options for Helpers::text_to_html).
94
147
  def to_html(options={})
95
148
  "<div class=\"item-group\" data-group-id=\"#{@id}\">\n " +
96
149
  @content.map{|x| x.to_html(options)}.join("") + "\n</div>\n"
97
150
  end
98
151
  end
99
152
 
100
- # A structure, containing a sequence of items and non-items
153
+ # A structure, containing a sequence of item groups, as well as a type and a description.
154
+ # A structure will be one of two or more in a "structure group".
101
155
  class Structure
102
- attr_reader :type, :description, :item_groups
103
156
 
157
+ # A short alphanumeric name for the type, typically reflecting the "language" of a structure
158
+ # where different structures in a group are different language versions of the same information.
159
+ # It is used to determine a CSS class of the structure. E.g. "english". (It can be nil.)
160
+ attr_reader :type
161
+
162
+ # A textual description of the type which will be displayed in the UI. E.g. "English".
163
+ # Ideally it should be relatively concise. Can be nil.
164
+ attr_reader :description
165
+
166
+ # The array of item groups that make up the content of the structure.
167
+ attr_reader :item_groups
168
+
169
+ # Initialize from type, description and item groups
104
170
  def initialize(type, description, item_groups)
105
171
  @type = type
106
172
  @description = description
107
173
  @item_groups = item_groups
108
174
  end
109
175
 
176
+ # A structure is equal to another structure with the same type, description and item groups
110
177
  def ==(otherStructure)
111
178
  otherStructure.class == Structure && otherStructure.type == @type &&
112
179
  otherStructure.description == description &&
113
180
  otherStructure.item_groups == @item_groups
114
181
  end
115
182
 
183
+ # From the type, determine the CSS class names to be used in the *<div>* element created
184
+ # by to_html. If there is no type, then just "structure", otherwise, "structure <type>-structure",
185
+ # e.g. if the type is "english", then "structure english-structure".
186
+ # (The "-structure" suffix is used to reduce the chance of accidental CSS class name collisions.)
116
187
  def css_class_names
117
188
  class_names = "structure"
118
189
  if @type != "" and @type != nil
@@ -121,6 +192,9 @@ module CorrespondenceMarkup
121
192
  class_names
122
193
  end
123
194
 
195
+ # Convert to HTML as a *<div>* with CSS class determined by *css_class_names*.
196
+ # Include a *<div>* of CSS class "language" (if the description is given)
197
+ # Include HTML for the item groups, converted according to the options for Helpers::text_to_html).
124
198
  def to_html(options={})
125
199
  itemGroupHtmls = @item_groups.map{|x| x.to_html(options)}
126
200
  "<div class=\"#{css_class_names}\">\n " +
@@ -130,18 +204,31 @@ module CorrespondenceMarkup
130
204
  end
131
205
 
132
206
  end
133
-
207
+
208
+ # A structure group is a group of structures. Different structures in one structure group
209
+ # all represent the same information, but in different "languages". Items different
210
+ # structures with the same item ID are shown in the UI as being translations of each other.
211
+ # (Items with the same ID in the same structure are also show as related, and are presumed
212
+ # to be separated components of a single virtual item.)
134
213
  class StructureGroup
214
+
215
+ # The array of structures
135
216
  attr_reader :structures
136
217
 
218
+ # Initialize from the structures
137
219
  def initialize(structures)
138
220
  @structures = structures
139
221
  end
140
222
 
223
+ # A structure group is equal to another structure group that has the same structures
141
224
  def ==(otherStructureGroup)
142
225
  otherStructureGroup.class == StructureGroup && otherStructureGroup.structures == @structures
143
226
  end
144
227
 
228
+ # Convert to HTML as a *<div>* of CSS class "structure-group" that contains the HTML
229
+ # outputs from the structures.
230
+ # Options for Helpers::text_to_html can be provided as single true/false value, or, as arrays
231
+ # of values, in which case the individual values are mapped to the corresponding structures.
145
232
  def to_html(options={})
146
233
  numStructures = structures.length
147
234
  structureOptions = Array.new(numStructures)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: correspondence-markup
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-07-10 00:00:00.000000000 Z
12
+ date: 2013-07-14 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: Use to generate HTML pages containing structure groups, structures and
15
15
  items as used by correspondence.js.
@@ -38,7 +38,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
38
38
  version: '0'
39
39
  segments:
40
40
  - 0
41
- hash: -1049094467
41
+ hash: -1042658257
42
42
  required_rubygems_version: !ruby/object:Gem::Requirement
43
43
  none: false
44
44
  requirements: