lexm 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 1d4d7c862b94b78fad36be7529b9a2ae2b7fc0ddcb7071703146ebbdb504576b
4
+ data.tar.gz: 7e5e2750d5b226fd6e6782c6e3337826d1d94a2771d4cada0ea2bf74c69dc2cd
5
+ SHA512:
6
+ metadata.gz: d91386b44fb2ad409d236a13e7685b3df4ebe6bc442de2826d488bbc927d2c28af1b6f6d3c52aabb64493f3a0b3b7e4d77951e43b022750041c5c7933a8d04fa
7
+ data.tar.gz: ae7d3b47fa6a7ff65e5fb1a125d257ad4aaf83fdf03c2d7cc803f98a0f5b0dbf76e1ce5df9de885e552e5d346e27e7c5a96dc8cbd0d3446f9a3de90668560c50
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Yanis Zafirópulos
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,132 @@
1
+ <div align="center">
2
+
3
+ <img align="center" width="400" src="icon.png"/>
4
+
5
+ ### Lemma Markup Format<br><br>![License](https://img.shields.io/github/license/drkameleon/lexm?style=for-the-badge)
6
+ </div>
7
+
8
+ ---
9
+
10
+ LexM is a concise, human-readable format for representing dictionary-ready, lexical entries with their various forms, relationships, and redirections.
11
+
12
+ ## Installation
13
+
14
+ Add this line to your application's Gemfile:
15
+
16
+ ```ruby
17
+ gem 'lexm'
18
+ ```
19
+
20
+ And then execute:
21
+
22
+ ```bash
23
+ $ bundle install
24
+ ```
25
+
26
+ Or install it yourself as:
27
+
28
+ ```bash
29
+ $ gem install lexm
30
+ ```
31
+
32
+ ## Basic Format
33
+
34
+ A LexM entry consists of a lemma (headword) and optional elements:
35
+
36
+ ```
37
+ lemma[annotations]|sublemma1,sublemma2,>(relation)target
38
+ ```
39
+
40
+ ## Examples
41
+
42
+ ```ruby
43
+ require 'lexm'
44
+ include LexM
45
+
46
+ # Create a lemma with annotations
47
+ lemma = Lemma.new("rise[sp:rose,pp:risen]")
48
+ puts lemma
49
+ # => rise[sp:rose,pp:risen]
50
+
51
+ # Create a lemma with sublemmas
52
+ lemma = Lemma.new("abandon|abandoned,abandonment")
53
+ puts lemma
54
+ # => abandon|abandoned,abandonment
55
+
56
+ # Create a redirection entry
57
+ lemma = Lemma.new("better>>(cmp)good")
58
+ puts lemma
59
+ # => better>>(cmp)good
60
+
61
+ # Create a lemma with a redirection sublemma
62
+ lemma = Lemma.new("rose|>(sp)rise")
63
+ puts lemma
64
+ # => rose|>(sp)rise
65
+
66
+ # Build a lemma programmatically
67
+ lemma = Lemma.new
68
+ lemma.text = "run"
69
+ lemma.setAnnotations({"sp" => "ran", "pp" => "run"})
70
+ lemma.addSublemmas(["run away", "run up"])
71
+ puts lemma
72
+ # => run[sp:ran,pp:run]|run away,run up
73
+
74
+ # Work with a collection of lemmas
75
+ list = LemmaList.new
76
+ list.addLemma(Lemma.new("go[sp:went,pp:gone]|go about,go ahead"))
77
+ list.addLemma(Lemma.new("better>>(cmp)good"))
78
+
79
+ # Find lemmas that redirect to "good"
80
+ good_redirects = list.findRedirectionsTo("good")
81
+
82
+ # Iterate through all words
83
+ list.eachWord do |word|
84
+ puts "Word: #{word}"
85
+ end
86
+ ```
87
+
88
+ ## Entry Types
89
+
90
+ ### Standard Lemma
91
+
92
+ A standard dictionary entry with a headword and optional annotations:
93
+
94
+ ```
95
+ run[sp:ran,pp:run]
96
+ ```
97
+
98
+ ### Lemma with Sublemmas
99
+
100
+ A headword with related forms or expressions:
101
+
102
+ ```
103
+ abandon|abandoned,abandonment
104
+ ```
105
+
106
+ ### Redirection Entry
107
+
108
+ A pure redirection that points to another lemma:
109
+
110
+ ```
111
+ better>>(cmp)good
112
+ ```
113
+
114
+ ### Mixed Format
115
+
116
+ A lemma that has sublemmas including a redirection:
117
+
118
+ ```
119
+ left|left-handed,>(sp,pp)leave
120
+ ```
121
+
122
+ ## Attribution
123
+ LexM was created and developed by Yanis Zafirópulos (a.k.a. Dr.Kameleon). If you use this software, please maintain this attribution.
124
+
125
+ ### How to Cite
126
+ If you use LexM in your research or applications, please cite it as:
127
+
128
+ > Yanis Zafirópulos (2025). "LexM: Lemma Markup Format." GitHub repository: https://github.com/drkameleon/lexm
129
+
130
+ ## License
131
+
132
+ This library is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/bin/lexm ADDED
@@ -0,0 +1,99 @@
1
+ #!/usr/bin/env ruby
2
+ #############################################################
3
+ # LexM - Lemma Markup Format
4
+ #
5
+ # A specification for representing, dictionary-ready
6
+ # lexical entries and their relationships
7
+ #
8
+ # File: bin/lexm
9
+ # Author: Yanis Zafirópulos (aka Dr.Kameleon)
10
+ #############################################################
11
+
12
+ require "lexm"
13
+ require "optparse"
14
+
15
+ # Parse command line options
16
+ options = {}
17
+ OptionParser.new do |opts|
18
+ opts.banner = "Usage: lexm [options] [file]"
19
+
20
+ opts.on("-v", "--version", "Show version") do |v|
21
+ puts "LexM version #{LexM::VERSION}"
22
+ exit
23
+ end
24
+
25
+ opts.on("-h", "--help", "Show help") do
26
+ puts opts
27
+ exit
28
+ end
29
+
30
+ opts.on("-c", "--count", "Count lemmas in file") do
31
+ options[:count] = true
32
+ end
33
+
34
+ opts.on("-l", "--list-words", "List all words (lemmas and sublemmas)") do
35
+ options[:list_words] = true
36
+ end
37
+
38
+ opts.on("-r", "--redirects", "List all redirection lemmas") do
39
+ options[:redirects] = true
40
+ end
41
+
42
+ opts.on("-t", "--to TARGET", "Find all lemmas redirecting to TARGET") do |target|
43
+ options[:target] = target
44
+ end
45
+ end.parse!
46
+
47
+ # Process the file if provided
48
+ if ARGV.empty?
49
+ puts "No file specified. Use --help for more information."
50
+ exit
51
+ end
52
+
53
+ filename = ARGV[0]
54
+ unless File.exist?(filename)
55
+ puts "File not found: #{filename}"
56
+ exit
57
+ end
58
+
59
+ begin
60
+ lemmas = LemmaList.new(filename)
61
+
62
+ if options[:count]
63
+ puts "Total lemmas: #{lemmas.size}"
64
+ puts "Normal lemmas: #{lemmas.normalLemmas.size}"
65
+ puts "Redirection lemmas: #{lemmas.redirectedLemmas.size}"
66
+ elsif options[:list_words]
67
+ puts "All words in file:"
68
+ lemmas.allWords.sort.each do |word|
69
+ puts " #{word}"
70
+ end
71
+ elsif options[:target]
72
+ target = options[:target]
73
+ puts "Lemmas redirecting to '#{target}':"
74
+ redirects = lemmas.findRedirectionsTo(target)
75
+ if redirects.empty?
76
+ puts " None found"
77
+ else
78
+ redirects.each do |lemma|
79
+ if lemma.redirect&.target == target
80
+ puts " #{lemma.text} -> #{target} (#{lemma.redirect.types.join(', ')})"
81
+ else
82
+ lemma.sublemmas.each do |sublemma|
83
+ if sublemma.redirect&.target == target
84
+ puts " #{lemma.text} -> #{target} [via sublemma #{sublemma.text || '(direct)'}] (#{sublemma.redirect.types.join(', ')})"
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
90
+ else
91
+ # Default behavior: print a summary
92
+ puts "Loaded #{lemmas.size} lemmas from #{filename}"
93
+ puts "Use --help for more options"
94
+ end
95
+ rescue => e
96
+ puts "Error processing file: #{e.message}"
97
+ puts e.backtrace.join("\n") if options[:debug]
98
+ exit 1
99
+ end
data/lib/lexm/lemma.rb ADDED
@@ -0,0 +1,377 @@
1
+ #############################################################
2
+ # LexM - Lemma Markup Format
3
+ #
4
+ # A specification for representing, dictionary-ready
5
+ # lexical entries and their relationships
6
+ #
7
+ # File: lib/lexm/lemma.rb
8
+ # Author: Yanis Zafirópulos (aka Dr.Kameleon)
9
+ #############################################################
10
+
11
+ module LexM
12
+ # Represents a lemma, the main entry in a lexicon
13
+ class Lemma
14
+ attr_accessor :text, :annotations, :sublemmas, :redirect
15
+
16
+ # Initialize from either a string or direct components
17
+ # @param input [String, nil] input string in LexM format to parse
18
+ def initialize(input = nil)
19
+ @text = nil
20
+ @annotations = {}
21
+ @sublemmas = []
22
+ @redirect = nil
23
+
24
+ parse(input) if input.is_a?(String)
25
+ end
26
+
27
+ # Parse a lemma string
28
+ # @param input [String] lemma string in LexM format
29
+ # @return [Lemma] self
30
+ def parse(input)
31
+ # Check for empty input
32
+ raise "Empty lemma input!" if input.nil? || input.strip.empty?
33
+
34
+ # Check for basic syntax issues
35
+ if input.count('[') != input.count(']')
36
+ raise "Malformed input: mismatched brackets in '#{input}'"
37
+ end
38
+
39
+ # Check for balanced pipes
40
+ if input.include?("|") && input.start_with?("|")
41
+ raise "Malformed input: lemma starts with pipe character in '#{input}'"
42
+ end
43
+
44
+ if input.include?(">>")
45
+ parseRedirectionLemma(input)
46
+ return self
47
+ end
48
+
49
+ lemmaPart, sublemmasPart = input.split('|', 2)
50
+
51
+ parseLemma(lemmaPart)
52
+ parseSublemmas(sublemmasPart) if sublemmasPart
53
+
54
+ self
55
+ end
56
+
57
+ # Parse a redirection lemma (with >> syntax)
58
+ # @param input [String] redirection lemma string
59
+ # @return [void]
60
+ def parseRedirectionLemma(input)
61
+ # Check for valid redirection syntax (needs a target after >>)
62
+ if input.match(/>>[\s]*$/)
63
+ raise "Malformed redirection syntax in '#{input}'. Should be 'word>>target' or 'word>>(relation)target'"
64
+ end
65
+
66
+ if input =~ /(.+?)>>\((.+?)\)(.+)/
67
+ @text = $1.strip
68
+ @redirect = LemmaRedirect.new($3.strip, $2.split(',').map(&:strip))
69
+ elsif input =~ /(.+?)>>(.+)/
70
+ @text = $1.strip
71
+ target = $2.strip
72
+ if target.empty?
73
+ raise "Malformed redirection syntax in '#{input}'. Missing target after '>>'"
74
+ end
75
+ @redirect = LemmaRedirect.new(target)
76
+ else
77
+ raise "Malformed redirection syntax in '#{input}'. Should be 'word>>target' or 'word>>(relation)target'"
78
+ end
79
+ end
80
+
81
+ # Parse just the lemma part (before any pipe)
82
+ # @param lemmaPart [String] lemma part string
83
+ # @return [void]
84
+ def parseLemma(lemmaPart)
85
+ if lemmaPart.include?('[')
86
+ baseLemma, annotationsPart = lemmaPart.split('[', 2)
87
+
88
+ # Check for malformed annotation syntax
89
+ raise "Malformed annotation: missing closing ']' in '#{lemmaPart}'" unless annotationsPart.end_with?(']')
90
+
91
+ # Ensure there's actual lemma text before annotations
92
+ if baseLemma.strip.empty?
93
+ raise "Missing lemma text before annotations in '#{lemmaPart}'"
94
+ end
95
+
96
+ @text = baseLemma.strip
97
+
98
+ annotationsPart.sub!(/\]$/, '')
99
+ parseAnnotations(annotationsPart)
100
+ else
101
+ # Simple lemma
102
+ # Ensure there's actual text
103
+ if lemmaPart.strip.empty?
104
+ raise "Empty lemma text in '#{lemmaPart}'"
105
+ end
106
+ @text = lemmaPart.strip
107
+ end
108
+ end
109
+
110
+ # Parse sublemmas part (after the pipe)
111
+ # @param sublemmasPart [String] sublemmas part string
112
+ # @return [void]
113
+ def parseSublemmas(sublemmasPart)
114
+ # Check if the sublemma part starts with a redirection marker
115
+ if sublemmasPart.start_with?('>')
116
+ # This is a case where the lemma has a pure redirection sublemma
117
+ # Format: word|>(relation)target
118
+ if sublemmasPart =~ />\((.+?)\)(.+)/
119
+ redirect = LemmaRedirect.new($2.strip, $1.split(',').map(&:strip))
120
+ @sublemmas << Sublemma.new(nil, redirect)
121
+ elsif sublemmasPart =~ />(.+)/
122
+ redirect = LemmaRedirect.new($1.strip)
123
+ @sublemmas << Sublemma.new(nil, redirect)
124
+ end
125
+ else
126
+ # Split the sublemmas and process each one
127
+ sublemmas = sublemmasPart.split(',')
128
+
129
+ # Process normal sublemmas
130
+ sublemmas.each do |sublemma|
131
+ sublemma = sublemma.strip
132
+
133
+ # Handle pure redirection sublemma
134
+ if sublemma.start_with?('>')
135
+ if sublemma =~ />\((.+?)\)(.+)/
136
+ redirect = LemmaRedirect.new($2.strip, $1.split(',').map(&:strip))
137
+ @sublemmas << Sublemma.new(nil, redirect)
138
+ elsif sublemma =~ />(.+)/
139
+ redirect = LemmaRedirect.new($1.strip)
140
+ @sublemmas << Sublemma.new(nil, redirect)
141
+ end
142
+ # Handle normal sublemma with possible redirection
143
+ elsif sublemma.include?('>')
144
+ if sublemma =~ /(.+?)>\((.+?)\)(.+)/
145
+ # Format: word>(relation)target
146
+ text = $1.strip
147
+ redirect = LemmaRedirect.new($3.strip, $2.split(',').map(&:strip))
148
+ @sublemmas << Sublemma.new(text, redirect)
149
+ elsif sublemma =~ /(.+?)>(.+)/
150
+ # Simple redirection without relation type
151
+ text = $1.strip
152
+ redirect = LemmaRedirect.new($2.strip)
153
+ @sublemmas << Sublemma.new(text, redirect)
154
+ else
155
+ @sublemmas << Sublemma.new(sublemma)
156
+ end
157
+ else
158
+ # Simple sublemma
159
+ @sublemmas << Sublemma.new(sublemma)
160
+ end
161
+ end
162
+ end
163
+ end
164
+
165
+ # Parse annotations like sp:past,pp:participle or pl:oxen
166
+ # @param annotationsText [String] annotations string
167
+ # @return [void]
168
+ def parseAnnotations(annotationsText)
169
+ if annotationsText.strip.empty?
170
+ raise "Empty annotations block"
171
+ end
172
+
173
+ annotationsText.split(',').each do |annotation|
174
+ if annotation.strip.empty?
175
+ raise "Empty annotation in comma-separated list"
176
+ end
177
+
178
+ if annotation.include?(':')
179
+ type, value = annotation.split(':', 2)
180
+
181
+ # Validate annotation type
182
+ if type.strip.empty?
183
+ raise "Empty annotation type in '#{annotation}'"
184
+ end
185
+
186
+ # Validate annotation value
187
+ if value.strip.empty?
188
+ raise "Empty annotation value for type '#{type.strip}'"
189
+ end
190
+
191
+ @annotations[type.strip] = value.strip
192
+ else
193
+ # Handle simple annotations without values
194
+ if annotation.strip.empty?
195
+ raise "Empty annotation name"
196
+ end
197
+
198
+ @annotations[annotation.strip] = true
199
+ end
200
+ end
201
+ end
202
+
203
+ # Add a standard sublemma
204
+ # @param text [String] text of the sublemma
205
+ # @return [Lemma] self
206
+ def addSublemma(text)
207
+ if redirected?
208
+ raise "Cannot add sublemmas to a redirection lemma"
209
+ end
210
+ @sublemmas << Sublemma.new(text)
211
+ self
212
+ end
213
+
214
+ # Add multiple sublemmas at once
215
+ # @param texts [Array<String>] array of sublemma texts
216
+ # @return [Lemma] self
217
+ def addSublemmas(texts)
218
+ if redirected?
219
+ raise "Cannot add sublemmas to a redirection lemma"
220
+ end
221
+ texts.each do |text|
222
+ @sublemmas << Sublemma.new(text)
223
+ end
224
+ self
225
+ end
226
+
227
+ # Add a pure redirect sublemma
228
+ # @param target [String] target to redirect to
229
+ # @param types [Array<String>] relation types
230
+ # @return [Lemma] self
231
+ def addRedirect(target, types = [])
232
+ if redirected?
233
+ raise "Cannot add sublemmas to a redirection lemma"
234
+ end
235
+ redirect = LemmaRedirect.new(target, types)
236
+ @sublemmas << Sublemma.new(nil, redirect)
237
+ self
238
+ end
239
+
240
+ # Set the lemma's redirection
241
+ # @param target [String] target to redirect to
242
+ # @param types [Array<String>] relation types
243
+ # @return [Lemma] self
244
+ def setRedirect(target, types = [])
245
+ if !@sublemmas.empty?
246
+ raise "Cannot set redirect on a lemma with sublemmas"
247
+ end
248
+ @redirect = LemmaRedirect.new(target, types)
249
+ self
250
+ end
251
+
252
+ # Validate annotation key and value format
253
+ # Ensures keys and values follow the expected format
254
+ # @param key [String] annotation key to validate
255
+ # @param value [String, Boolean] annotation value to validate
256
+ # @return [Boolean] true if validation passes
257
+ # @raise [StandardError] with detailed message if validation fails
258
+ def validateAnnotation(key, value)
259
+ # Check that key matches a valid pattern (alphanumeric and limited symbols)
260
+ unless key =~ /^[a-zA-Z0-9_]+$/
261
+ raise "Invalid annotation key: '#{key}' (must contain only letters, numbers, and underscores)"
262
+ end
263
+
264
+ # Additional validation for values
265
+ if value.is_a?(String)
266
+ # Check for invalid characters in value if needed
267
+ if value.include?(']') || value.include?('[')
268
+ raise "Invalid annotation value for '#{key}': cannot contain square brackets"
269
+ end
270
+ end
271
+ end
272
+
273
+ # Set an annotation
274
+ # @param type [String] annotation type
275
+ # @param value [Object] annotation value
276
+ # @return [Lemma] self
277
+ def setAnnotation(type, value = true)
278
+ if redirected?
279
+ raise "Cannot add annotations to a redirection lemma"
280
+ end
281
+ validateAnnotation(type, value)
282
+ @annotations[type] = value
283
+ self
284
+ end
285
+
286
+ # Add multiple annotations at once
287
+ # @param annotations [Hash] hash of annotation type => value pairs
288
+ # @return [Lemma] self
289
+ def setAnnotations(annotations)
290
+ if redirected?
291
+ raise "Cannot add annotations to a redirection lemma"
292
+ end
293
+ annotations.each do |key, value|
294
+ @annotations[key] = value
295
+ end
296
+ self
297
+ end
298
+
299
+ # Clear all annotations
300
+ # @return [Lemma] self
301
+ def clearAnnotations
302
+ @annotations = {}
303
+ self
304
+ end
305
+
306
+ # Clear all sublemmas
307
+ # @return [Lemma] self
308
+ def clearSublemmas
309
+ @sublemmas = []
310
+ self
311
+ end
312
+
313
+ # Clear redirect
314
+ # @return [Lemma] self
315
+ def clearRedirect
316
+ @redirect = nil
317
+ self
318
+ end
319
+
320
+ # Clear all annotations and sublemmas but keep the main lemma
321
+ # @return [Lemma] self
322
+ def clear
323
+ @annotations = {}
324
+ @sublemmas = []
325
+ @redirect = nil
326
+ self
327
+ end
328
+
329
+ # Clear everything including the main lemma
330
+ # @return [Lemma] self
331
+ def clearAll
332
+ @text = nil
333
+ @annotations = {}
334
+ @sublemmas = []
335
+ @redirect = nil
336
+ self
337
+ end
338
+
339
+ # Is this a redirection lemma (no sublemmas, just a redirect)?
340
+ # @return [Boolean] true if this is a redirection lemma
341
+ def redirected?
342
+ !@redirect.nil? && @sublemmas.empty?
343
+ end
344
+
345
+ # Convert to string format
346
+ # @return [String] the string representation of this lemma
347
+ def to_s
348
+ # Redirection lemma format (with double >>)
349
+ if redirected?
350
+ return "#{@text}>>#{@redirect.to_s.sub('>', '')}"
351
+ end
352
+
353
+ # Normal lemma format
354
+ result = ""
355
+
356
+ # Format the lemma part with any annotations
357
+ if @text.nil?
358
+ return ""
359
+ elsif @annotations.empty?
360
+ result << @text
361
+ else
362
+ annotationsStr = @annotations.map do |type, value|
363
+ value == true ? type : "#{type}:#{value}"
364
+ end.join(',')
365
+ result << "#{@text}[#{annotationsStr}]"
366
+ end
367
+
368
+ # Add sublemmas if present
369
+ if !@sublemmas.empty?
370
+ result << "|"
371
+ result << @sublemmas.map(&:to_s).join(',')
372
+ end
373
+
374
+ result
375
+ end
376
+ end
377
+ end
@@ -0,0 +1,263 @@
1
+ #############################################################
2
+ # LexM - Lemma Markup Format
3
+ #
4
+ # A specification for representing, dictionary-ready
5
+ # lexical entries and their relationships
6
+ #
7
+ # File: lib/lexm/lemma_list.rb
8
+ # Author: Yanis Zafirópulos (aka Dr.Kameleon)
9
+ #############################################################
10
+
11
+ module LexM
12
+ # Represents a collection of lemmas
13
+ class LemmaList
14
+ # The array of lemmas
15
+ attr_reader :lemmas
16
+
17
+ # Initialize a new lemma list, optionally from a string or file
18
+ # @param input [String, nil] input string or filename
19
+ def initialize(input = nil)
20
+ @lemmas = []
21
+
22
+ if input.is_a?(String)
23
+ # Assume it's a filename if it doesn't contain newlines
24
+ if input.include?("\n")
25
+ parseString(input)
26
+ else
27
+ parseFile(input)
28
+ end
29
+ end
30
+ end
31
+
32
+ # Iterate through all lemma lemmas
33
+ # @yield [Lemma] each lemma in the list
34
+ def each
35
+ @lemmas.each do |lemma|
36
+ yield lemma
37
+ end
38
+ end
39
+
40
+ # Iterate through all words (both lemmas and sublemmas)
41
+ # @yield [String] each word (lemma text or sublemma text)
42
+ def eachWord
43
+ @lemmas.each do |lemma|
44
+ # Yield the main lemma text
45
+ yield lemma.text if lemma.text
46
+
47
+ # Yield all sublemma texts
48
+ lemma.sublemmas.each do |sublemma|
49
+ yield sublemma.text if sublemma.text
50
+ end
51
+ end
52
+ end
53
+
54
+ # Get an array of all words (both lemmas and sublemmas)
55
+ # @return [Array<String>] array of all words
56
+ def allWords
57
+ words = []
58
+ eachWord { |word| words << word }
59
+ words
60
+ end
61
+
62
+ # Parse a multi-line string
63
+ # @param text [String] text to parse
64
+ # @return [LemmaList] self
65
+ def parseString(text)
66
+ text.each_line do |line|
67
+ line = line.strip
68
+ next if line.empty? || line.start_with?('#')
69
+ @lemmas << Lemma.new(line)
70
+ end
71
+ self
72
+ end
73
+
74
+ # Parse from a file
75
+ # @param filename [String] file to parse
76
+ # @return [LemmaList] self
77
+ def parseFile(filename)
78
+ begin
79
+ line_number = 0
80
+ File.open(filename, 'r') do |file|
81
+ file.each_line do |line|
82
+ line_number += 1
83
+ line = line.strip
84
+ next if line.empty? || line.start_with?('#')
85
+
86
+ begin
87
+ @lemmas << Lemma.new(line)
88
+ rescue StandardError => e
89
+ raise "Error on line #{line_number}: #{e.message} (#{line})"
90
+ end
91
+ end
92
+ end
93
+ rescue Errno::ENOENT
94
+ raise "File not found: #{filename}"
95
+ rescue Errno::EACCES
96
+ raise "Permission denied: #{filename}"
97
+ rescue StandardError => e
98
+ raise "Error reading file: #{e.message}"
99
+ end
100
+ self
101
+ end
102
+
103
+ # Check for circular redirection chains
104
+ # For example, if A redirects to B, which redirects back to A
105
+ # @return [Boolean] true if no circular redirections are found
106
+ # @raise [StandardError] with cycle path if circular redirections are detected
107
+ def validateRedirections
108
+ # Build a redirection graph
109
+ redirection_map = {}
110
+
111
+ @lemmas.each do |lemma|
112
+ if lemma.redirected?
113
+ redirection_map[lemma.text] = lemma.redirect.target
114
+ end
115
+ end
116
+
117
+ # Check for cycles
118
+ redirection_map.each_key do |start|
119
+ visited = []
120
+ current = start
121
+
122
+ while redirection_map.key?(current) && !visited.include?(current)
123
+ visited << current
124
+ current = redirection_map[current]
125
+ end
126
+
127
+ if redirection_map.key?(current) && current == start
128
+ cycle_path = visited.join(" -> ") + " -> " + current
129
+ raise "Circular redirection detected: #{cycle_path}"
130
+ end
131
+ end
132
+
133
+ true
134
+ end
135
+
136
+ # Validate the entire lemma list for consistency
137
+ # Runs all validation checks
138
+ # @return [Boolean] true if validation passes
139
+ # @raise [StandardError] with detailed message if validation fails
140
+ def validate
141
+ validateRedirections
142
+ true
143
+ end
144
+
145
+ # Find lemmas by lemma text
146
+ # @param text [String] lemma text to search for
147
+ # @return [Array<Lemma>] matching lemmas
148
+ def findByText(text)
149
+ @lemmas.select { |lemma| lemma.text == text }
150
+ end
151
+
152
+ # Find normal lemmas (not redirection lemmas)
153
+ # @return [Array<Lemma>] normal lemmas
154
+ def normalLemmas
155
+ @lemmas.select { |lemma| not lemma.redirected? }
156
+ end
157
+
158
+ # Find redirection lemmas
159
+ # @return [Array<Lemma>] redirection lemmas
160
+ def redirectedLemmas
161
+ @lemmas.select { |lemma| lemma.redirected? }
162
+ end
163
+
164
+ # Find lemmas that redirect to a given target, optionally filtered by type
165
+ # @param target [String] target to search for
166
+ # @param type [String, nil] optional relation type filter
167
+ # @return [Array<Lemma>] matching lemmas
168
+ def findRedirectionsTo(target, type = nil)
169
+ @lemmas.select do |lemma|
170
+ if lemma.redirected? && lemma.redirect.target == target
171
+ type.nil? || lemma.redirect.types.include?(type)
172
+ else
173
+ lemma.sublemmas.any? do |sublemma|
174
+ sublemma.redirected? &&
175
+ sublemma.redirect.target == target &&
176
+ (type.nil? || sublemma.redirect.types.include?(type))
177
+ end
178
+ end
179
+ end
180
+ end
181
+
182
+ # Find lemmas by annotation
183
+ # @param type [String] annotation type
184
+ # @param value [Object, nil] optional value to match
185
+ # @return [Array<Lemma>] matching lemmas
186
+ def findByAnnotation(type, value = nil)
187
+ @lemmas.select do |lemma|
188
+ if value.nil?
189
+ lemma.annotations.key?(type)
190
+ else
191
+ lemma.annotations[type] == value
192
+ end
193
+ end
194
+ end
195
+
196
+ # Add a new lemma
197
+ # @param lemma [Lemma] lemma to add
198
+ # @return [LemmaList] self
199
+ def addLemma(lemma)
200
+ @lemmas << lemma
201
+ self
202
+ end
203
+
204
+ # Add multiple lemmas at once
205
+ # @param lemmas [Array<Lemma>] lemmas to add
206
+ # @return [LemmaList] self
207
+ def addLemmas(lemmas)
208
+ @lemmas.concat(lemmas)
209
+ self
210
+ end
211
+
212
+ # Remove a lemma
213
+ # @param lemma [Lemma] lemma to remove
214
+ # @return [LemmaList] self
215
+ def removeLemma(lemma)
216
+ @lemmas.delete(lemma)
217
+ self
218
+ end
219
+
220
+ # Clear all lemmas
221
+ # @return [LemmaList] self
222
+ def clear
223
+ @lemmas = []
224
+ self
225
+ end
226
+
227
+ # Get number of lemmas
228
+ # @return [Integer] number of lemmas
229
+ def size
230
+ @lemmas.size
231
+ end
232
+
233
+ # Get lemma by index
234
+ # @param index [Integer] index
235
+ # @return [Lemma] lemma at index
236
+ def [](index)
237
+ @lemmas[index]
238
+ end
239
+
240
+ # Save to a file
241
+ # @param filename [String] file to save to
242
+ # @return [void]
243
+ def save(filename)
244
+ begin
245
+ File.open(filename, 'w') do |file|
246
+ @lemmas.each do |lemma|
247
+ file.puts(lemma.to_s)
248
+ end
249
+ end
250
+ rescue Errno::EACCES
251
+ raise "Permission denied: Cannot write to #{filename}"
252
+ rescue StandardError => e
253
+ raise "Error writing to file: #{e.message}"
254
+ end
255
+ end
256
+
257
+ # Convert to string
258
+ # @return [String] string representation
259
+ def to_s
260
+ @lemmas.map(&:to_s).join("\n")
261
+ end
262
+ end
263
+ end
@@ -0,0 +1,36 @@
1
+ #############################################################
2
+ # LexM - Lemma Markup Format
3
+ #
4
+ # A specification for representing, dictionary-ready
5
+ # lexical entries and their relationships
6
+ #
7
+ # File: lib/lexm/lemma_redirect.rb
8
+ # Author: Yanis Zafirópulos (aka Dr.Kameleon)
9
+ #############################################################
10
+
11
+ module LexM
12
+ # Represents a redirection to another lemma, with optional relation types
13
+ class LemmaRedirect
14
+ attr_accessor :target, :types
15
+
16
+ # Initialize a new redirection
17
+ # @param target [String] the target lemma to redirect to
18
+ # @param types [Array<String>] relation types (e.g., ["pl"], ["sp", "pp"])
19
+ def initialize(target, types = [])
20
+ @target = target
21
+ @types = types || []
22
+ end
23
+
24
+ # Convert to string representation
25
+ # @return [String] the string representation of this redirection
26
+ def to_s
27
+ if @types.empty?
28
+ ">#{@target}"
29
+ elsif @types.length == 1
30
+ ">(#{@types.first})#{@target}"
31
+ else
32
+ ">(#{@types.join(',')})#{@target}"
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,40 @@
1
+ #############################################################
2
+ # LexM - Lemma Markup Format
3
+ #
4
+ # A specification for representing, dictionary-ready
5
+ # lexical entries and their relationships
6
+ #
7
+ # File: lib/lexm/sublemma.rb
8
+ # Author: Yanis Zafirópulos (aka Dr.Kameleon)
9
+ #############################################################
10
+
11
+ module LexM
12
+ # Represents a sublemma, which can be either a textual sublemma or a redirection
13
+ class Sublemma
14
+ attr_accessor :text, :redirect
15
+
16
+ # Initialize a new sublemma
17
+ # @param text [String, nil] the text of the sublemma (nil for pure redirections)
18
+ # @param redirect [LemmaRedirect, nil] redirection information (nil for normal sublemmas)
19
+ def initialize(text = nil, redirect = nil)
20
+ @text = text
21
+ @redirect = redirect
22
+ end
23
+
24
+ # Is this a pure redirection sublemma?
25
+ # @return [Boolean] true if this is a pure redirection with no text
26
+ def redirected?
27
+ @text.nil? && !@redirect.nil?
28
+ end
29
+
30
+ # Convert to string representation
31
+ # @return [String] the string representation of this sublemma
32
+ def to_s
33
+ if redirected?
34
+ @redirect.to_s
35
+ else
36
+ @text
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,13 @@
1
+ #############################################################
2
+ # LexM - Lemma Markup Format
3
+ #
4
+ # A specification for representing, dictionary-ready
5
+ # lexical entries and their relationships
6
+ #
7
+ # File: lib/lexm/version.rb
8
+ # Author: Yanis Zafirópulos (aka Dr.Kameleon)
9
+ #############################################################
10
+
11
+ module LexM
12
+ VERSION = "0.2.0"
13
+ end
data/lib/lexm.rb ADDED
@@ -0,0 +1,19 @@
1
+ #############################################################
2
+ # LexM - Lemma Markup Format
3
+ #
4
+ # A specification for representing, dictionary-ready
5
+ # lexical entries and their relationships
6
+ #
7
+ # File: lib/lexm.rb
8
+ # Author: Yanis Zafirópulos (aka Dr.Kameleon)
9
+ #############################################################
10
+
11
+ require 'lexm/version'
12
+ require 'lexm/lemma_redirect'
13
+ require 'lexm/sublemma'
14
+ require 'lexm/lemma'
15
+ require 'lexm/lemma_list'
16
+
17
+ module LexM
18
+
19
+ end
metadata ADDED
@@ -0,0 +1,85 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: lexm
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Yanis Zafirópulos
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2025-03-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rspec
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '3.0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '3.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: yard
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.9'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.9'
41
+ description: A specification for representing, dictionary-ready lexical entries and
42
+ their relationships
43
+ email:
44
+ - drkameleon@gmail.com
45
+ executables:
46
+ - lexm
47
+ extensions: []
48
+ extra_rdoc_files: []
49
+ files:
50
+ - LICENSE
51
+ - README.md
52
+ - bin/lexm
53
+ - lib/lexm.rb
54
+ - lib/lexm/lemma.rb
55
+ - lib/lexm/lemma_list.rb
56
+ - lib/lexm/lemma_redirect.rb
57
+ - lib/lexm/sublemma.rb
58
+ - lib/lexm/version.rb
59
+ homepage: https://github.com/drkameleon/lexm
60
+ licenses:
61
+ - MIT
62
+ metadata:
63
+ homepage_uri: https://github.com/drkameleon/lexm
64
+ source_code_uri: https://github.com/drkameleon/lexm
65
+ changelog_uri: https://github.com/drkameleon/lexm/blob/main/CHANGELOG.md
66
+ post_install_message:
67
+ rdoc_options: []
68
+ require_paths:
69
+ - lib
70
+ required_ruby_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: 2.5.0
75
+ required_rubygems_version: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
80
+ requirements: []
81
+ rubygems_version: 3.4.10
82
+ signing_key:
83
+ specification_version: 4
84
+ summary: LexM - Lemma Markup Format
85
+ test_files: []