lexm 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 147eccae18795425b54c13045269798e8c438523c5453279c5274fc44a95fc65
4
- data.tar.gz: b5055ec1bb29595732129402875e58f79c9d2440053624a88dec447f8e82752f
3
+ metadata.gz: c74656fda529cd304f92c513c537445e2e492cab160fb2397905e3e00f9fe059
4
+ data.tar.gz: fdc730362c3dd42a991ba11fdd96ef5d3ce60cb5332e1d85d4c342409cadc364
5
5
  SHA512:
6
- metadata.gz: 6144cfd7f2eb44f7ef4a4f134925a39ff2c9a675f5176de4c09f17d02f8805aac7ba71ed5a6c81fbf48435cf99e11b4e0b912c263e3759c880a8c655983b902b
7
- data.tar.gz: '0479a14b133a3437314bce00e2d12ae9fedd45c5c69170ff76ee0b35338aaf43d2b5efa22462b240fa2c7c1c0a06d0b6255ec7d8ea53c0156674bdf9f46c59ce'
6
+ metadata.gz: 6404296f5a2d1ea6791550e6a87dd8a95e009bdb1bef2fb7aaabb7f6204330243ff1ce0275b004b07a3f2a53e4b07cb680b35c074a3a82cebcff1507581a2328
7
+ data.tar.gz: a7c7125edb88bfeba32d952a6eabbde18c167755b2f074e562bdd33fc30a9cd073cd4848bfc3ba232515e7791aed4f0c60692fe31442fdd3f9c299ba65e8c3fb
data/lib/lexm/lemma.rb CHANGED
@@ -119,57 +119,75 @@ module LexM
119
119
  # @param sublemmasPart [String] sublemmas part string
120
120
  # @return [void]
121
121
  def parseSublemmas(sublemmasPart)
122
- # Check if the sublemma part starts with a redirection marker
123
- if sublemmasPart.start_with?('>')
124
- # This is a case where the lemma has a pure redirection sublemma
125
- # Format: word|>(relation)target
126
- if sublemmasPart =~ />\((.+?)\)(.+)/
127
- redirect = LemmaRedirect.new($2.strip, $1.split(',').map(&:strip))
128
- @sublemmas << Sublemma.new(nil, redirect)
129
- elsif sublemmasPart =~ />(.+)/
130
- redirect = LemmaRedirect.new($1.strip)
131
- @sublemmas << Sublemma.new(nil, redirect)
132
- end
133
- else
134
- # Split the sublemmas and process each one
135
- sublemmas = sublemmasPart.split(',')
122
+ # We need a smarter way to split sublemmas that respects parentheses
123
+ # This helps us correctly handle cases like ">(sp,pp)wring,abc"
124
+ sublemmas = smart_split_sublemmas(sublemmasPart)
125
+
126
+ # Process each sublemma
127
+ sublemmas.each do |sublemma|
128
+ sublemma = sublemma.strip
136
129
 
137
- # Process normal sublemmas
138
- sublemmas.each do |sublemma|
139
- sublemma = sublemma.strip
140
-
141
- # Handle pure redirection sublemma
142
- if sublemma.start_with?('>')
143
- if sublemma =~ />\((.+?)\)(.+)/
144
- redirect = LemmaRedirect.new($2.strip, $1.split(',').map(&:strip))
145
- @sublemmas << Sublemma.new(nil, redirect)
146
- elsif sublemma =~ />(.+)/
147
- redirect = LemmaRedirect.new($1.strip)
148
- @sublemmas << Sublemma.new(nil, redirect)
149
- end
150
- # Handle normal sublemma with possible redirection
151
- elsif sublemma.include?('>')
152
- if sublemma =~ /(.+?)>\((.+?)\)(.+)/
153
- # Format: word>(relation)target
154
- text = $1.strip
155
- redirect = LemmaRedirect.new($3.strip, $2.split(',').map(&:strip))
156
- @sublemmas << Sublemma.new(text, redirect)
157
- elsif sublemma =~ /(.+?)>(.+)/
158
- # Simple redirection without relation type
159
- text = $1.strip
160
- redirect = LemmaRedirect.new($2.strip)
161
- @sublemmas << Sublemma.new(text, redirect)
162
- else
163
- @sublemmas << Sublemma.new(sublemma)
164
- end
130
+ # Handle pure redirection sublemma (starts with >)
131
+ if sublemma.start_with?('>')
132
+ if sublemma =~ />\((.+?)\)(.+)/
133
+ redirect = LemmaRedirect.new($2.strip, $1.split(',').map(&:strip))
134
+ @sublemmas << Sublemma.new(nil, redirect, self)
135
+ elsif sublemma =~ />(.+)/
136
+ redirect = LemmaRedirect.new($1.strip)
137
+ @sublemmas << Sublemma.new(nil, redirect, self)
138
+ end
139
+ # Handle normal sublemma with possible redirection
140
+ elsif sublemma.include?('>')
141
+ # Check for a redirection with relation types
142
+ if sublemma =~ /(.+?)>\((.+?)\)(.+)/
143
+ # Format: word>(relation)target
144
+ text = $1.strip
145
+ redirect = LemmaRedirect.new($3.strip, $2.split(',').map(&:strip))
146
+ @sublemmas << Sublemma.new(text, redirect, self)
147
+ elsif sublemma =~ /(.+?)>(.+)/
148
+ # Simple redirection without relation type
149
+ text = $1.strip
150
+ redirect = LemmaRedirect.new($2.strip)
151
+ @sublemmas << Sublemma.new(text, redirect, self)
165
152
  else
166
- # Simple sublemma
167
- @sublemmas << Sublemma.new(sublemma)
153
+ @sublemmas << Sublemma.new(sublemma, nil, self)
168
154
  end
155
+ else
156
+ # Simple sublemma
157
+ @sublemmas << Sublemma.new(sublemma, nil, self)
169
158
  end
170
159
  end
171
160
  end
172
161
 
162
+ # Helper method to split sublemmas while respecting parentheses
163
+ # This ensures we don't split inside relation type lists like (sp,pp)
164
+ # @param text [String] text to split at commas outside of parentheses
165
+ # @return [Array<String>] resulting substrings
166
+ def smart_split_sublemmas(text)
167
+ result = []
168
+ current = ""
169
+ paren_level = 0
170
+
171
+ text.each_char do |c|
172
+ if c == ',' && paren_level == 0
173
+ # Only split on commas outside of parentheses
174
+ result << current unless current.empty?
175
+ current = ""
176
+ else
177
+ current << c
178
+ # Track parenthesis nesting level
179
+ if c == '('
180
+ paren_level += 1
181
+ elsif c == ')'
182
+ paren_level -= 1 if paren_level > 0
183
+ end
184
+ end
185
+ end
186
+
187
+ result << current unless current.empty?
188
+ result
189
+ end
190
+
173
191
  # Parse annotations like sp:past,pp:participle or pl:oxen
174
192
  # @param annotationsText [String] annotations string
175
193
  # @return [void]
@@ -215,7 +233,7 @@ module LexM
215
233
  if redirected?
216
234
  raise "Cannot add sublemmas to a redirection lemma"
217
235
  end
218
- @sublemmas << Sublemma.new(text)
236
+ @sublemmas << Sublemma.new(text, nil, self)
219
237
  self
220
238
  end
221
239
 
@@ -227,7 +245,7 @@ module LexM
227
245
  raise "Cannot add sublemmas to a redirection lemma"
228
246
  end
229
247
  texts.each do |text|
230
- @sublemmas << Sublemma.new(text)
248
+ @sublemmas << Sublemma.new(text, nil, self)
231
249
  end
232
250
  self
233
251
  end
@@ -241,7 +259,7 @@ module LexM
241
259
  raise "Cannot add sublemmas to a redirection lemma"
242
260
  end
243
261
  redirect = LemmaRedirect.new(target, types)
244
- @sublemmas << Sublemma.new(nil, redirect)
262
+ @sublemmas << Sublemma.new(nil, redirect, self)
245
263
  self
246
264
  end
247
265
 
@@ -257,6 +275,21 @@ module LexM
257
275
  self
258
276
  end
259
277
 
278
+ # Returns a hash mapping each sublemma to its shortcut
279
+ # @param placeholder [String] optional placeholder to use instead of "~" (default: "~")
280
+ # @return [Hash<String, String>] hash mapping full sublemma text to shortcut
281
+ def shortcuts(placeholder = "~")
282
+ return {} if @text.nil? || redirected? || @sublemmas.empty?
283
+
284
+ result = {}
285
+ @sublemmas.each do |sublemma|
286
+ # Skip redirections and get the shortcut for text sublemmas
287
+ next if sublemma.redirected? || sublemma.text.nil?
288
+ result[sublemma.text] = sublemma.shortcut(placeholder)
289
+ end
290
+ result
291
+ end
292
+
260
293
  # Validate annotation key and value format
261
294
  # Ensures keys and values follow the expected format
262
295
  # @param key [String] annotation key to validate
@@ -609,6 +609,52 @@ module LexM
609
609
  def [](index)
610
610
  @lemmas[index]
611
611
  end
612
+
613
+ # Sort the lemmas based on their headwords (non-destructive)
614
+ # @param block [Proc] optional custom sort proc
615
+ # @return [LemmaList] a new sorted lemma list
616
+ def sort(&block)
617
+ if block_given?
618
+ sorted_list = LemmaList.new
619
+ sorted_list.instance_variable_set(:@lemmas, @lemmas.sort(&block))
620
+ sorted_list
621
+ else
622
+ # Default sort by headword text
623
+ sorted_list = LemmaList.new
624
+ sorted_list.instance_variable_set(:@lemmas, @lemmas.sort_by { |lemma| lemma.text.to_s.downcase })
625
+ sorted_list
626
+ end
627
+ end
628
+
629
+ # Sort the lemmas based on their headwords (destructive)
630
+ # @param block [Proc] optional custom sort proc
631
+ # @return [LemmaList] self
632
+ def sort!(&block)
633
+ if block_given?
634
+ @lemmas.sort!(&block)
635
+ else
636
+ # Default sort by headword text
637
+ @lemmas.sort_by! { |lemma| lemma.text.to_s.downcase }
638
+ end
639
+ self
640
+ end
641
+
642
+ # Sort the lemmas using a custom key function (non-destructive)
643
+ # @param block [Proc] key function to extract sort keys from lemmas
644
+ # @return [LemmaList] a new sorted lemma list
645
+ def sort_by(&block)
646
+ sorted_list = LemmaList.new
647
+ sorted_list.instance_variable_set(:@lemmas, @lemmas.sort_by(&block))
648
+ sorted_list
649
+ end
650
+
651
+ # Sort the lemmas using a custom key function (destructive)
652
+ # @param block [Proc] key function to extract sort keys from lemmas
653
+ # @return [LemmaList] self
654
+ def sort_by!(&block)
655
+ @lemmas.sort_by!(&block)
656
+ self
657
+ end
612
658
 
613
659
  # Save to a file
614
660
  # @param filename [String] file to save to
data/lib/lexm/sublemma.rb CHANGED
@@ -14,16 +14,20 @@ module LexM
14
14
  attr_accessor :text, :redirect
15
15
  # Source location information
16
16
  attr_accessor :source_file, :source_line, :source_column
17
+ # Reference to parent lemma
18
+ attr_accessor :parent
17
19
 
18
20
  # Initialize a new sublemma
19
21
  # @param text [String, nil] the text of the sublemma (nil for pure redirections)
20
22
  # @param redirect [LemmaRedirect, nil] redirection information (nil for normal sublemmas)
23
+ # @param parent [Lemma, nil] parent lemma (optional)
21
24
  # @param source_file [String, nil] source file path
22
25
  # @param source_line [Integer, nil] source line number
23
26
  # @param source_column [Integer, nil] source column number
24
- def initialize(text = nil, redirect = nil, source_file = nil, source_line = nil, source_column = nil)
27
+ def initialize(text = nil, redirect = nil, parent = nil, source_file = nil, source_line = nil, source_column = nil)
25
28
  @text = text
26
29
  @redirect = redirect
30
+ @parent = parent
27
31
  @source_file = source_file
28
32
  @source_line = source_line
29
33
  @source_column = source_column
@@ -35,6 +39,37 @@ module LexM
35
39
  @text.nil? && !@redirect.nil?
36
40
  end
37
41
 
42
+ # Returns a shortened version of the sublemma text, replacing the lemma part with a placeholder
43
+ # For example, if the lemma is "work" and sublemma is "work out", this returns "~ out"
44
+ # @param placeholder [String] optional placeholder to use instead of "~" (default: "~")
45
+ # @return [String, nil] the shortened sublemma text or nil if this is a redirection sublemma or has no parent
46
+ def shortcut(placeholder = "~")
47
+ return nil if redirected? || @text.nil? || @parent.nil? || @parent.text.nil?
48
+
49
+ parent_text = @parent.text
50
+
51
+ # Check if the sublemma starts with the parent lemma
52
+ if @text.start_with?(parent_text)
53
+ # Replace the parent lemma with the placeholder
54
+ remainder = @text[parent_text.length..-1]
55
+
56
+ # If the remainder starts with a space, keep it
57
+ if remainder.start_with?(" ")
58
+ return "#{placeholder}#{remainder}"
59
+ elsif remainder.empty?
60
+ # For exact matches, just return the placeholder
61
+ return placeholder
62
+ else
63
+ # For cases where the lemma is a prefix but not a whole word
64
+ # (e.g., lemma "over", sublemma "overdo") - don't create a shortcut
65
+ return @text
66
+ end
67
+ else
68
+ # If the sublemma doesn't start with the parent lemma, return the full text
69
+ return @text
70
+ end
71
+ end
72
+
38
73
  # Convert to string representation
39
74
  # @return [String] the string representation of this sublemma
40
75
  def to_s
data/lib/lexm/version.rb CHANGED
@@ -9,5 +9,5 @@
9
9
  #############################################################
10
10
 
11
11
  module LexM
12
- VERSION = "0.3.0"
12
+ VERSION = "0.4.0"
13
13
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lexm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yanis Zafirópulos
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-03-21 00:00:00.000000000 Z
11
+ date: 2025-03-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec