camdict 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,606 @@
1
+ require 'camdict/explanation'
2
+
3
+ module Camdict
4
+
5
+ # Parse an html definition to get explanations, word, IPA, prounciation,
6
+ # part of speech, etc.
7
+
8
+ class Definition
9
+ # Struct IPA is the written pronunciations for UK/US.
10
+ # +uk+: the UK IPA; +k+: the superscript index in UK IPA.
11
+ # +us+: the US IPA; +s+: the superscript index in US IPA.
12
+ IPA = Struct.new(:uk, :k, :us, :s)
13
+ # Struct Pronunciation has two memebers.
14
+ # Each +uk+/+us+ has its own mp3/ogg links.
15
+ Pronunciation = Struct.new(:uk, :us)
16
+ # Struct Link has two memembers +mp3+ and +ogg+, which are the http links.
17
+ Link = Struct.new(:mp3, :ogg)
18
+
19
+ # Simple Past, Past Participle, PRsent participle of a verb. Only irregular
20
+ # verbs have these values. It struct memebers are +sp+, +pp+, +pr+.
21
+ Irregular = Struct.new(:sp, :pp, :pr)
22
+ # Get part of speech of a word or phrase.
23
+ attr_reader :part_of_speech
24
+ # Get explanations for this definition.
25
+ attr_reader :explanations
26
+ # Is the queried word/phrase an idiom?
27
+ attr_reader :is_idiom
28
+ # Get the IPA
29
+ attr_reader :ipa
30
+ # Get the pronunciation
31
+ attr_reader :pronunciation
32
+ # Get the region: UK or US
33
+ attr_reader :region
34
+ # Get the short usage
35
+ attr_reader :usage
36
+ # Grammar code. Like U, means uncountable noun.
37
+ attr_reader :gc
38
+ # Get the guided word for this definition entry, which is usually just one
39
+ # word or a phrase. This does not exist when there is only one definition.
40
+ # It is useful when there are many definitions for one word to distinguish
41
+ # them.
42
+ attr_reader :guided
43
+ # Get the verb irregular form word. +word.verb.sp+ gets the simple past
44
+ # tense of this verb.
45
+ attr_reader :verb
46
+
47
+ # Input are +word+ and +entry_html+ is
48
+ # { entry ID => its html definition source }
49
+ def initialize(word, entry_html)
50
+ @word = word
51
+ @entry_id, @html = entry_html.flatten
52
+ @html = Nokogiri::HTML(@html)
53
+ @title_word = title_word # String
54
+ @derived_words = derived_words # String or [String]
55
+ @spelling_variant = spell_variant # String
56
+ @head_variant = get_head_variant # [String]
57
+ @body_variant = get_body_variant # [String]
58
+ @inflection = get_inflection # [String]
59
+ @phrase = get_phrase # [String]
60
+ @is_idiom = is_idiom? # True or False
61
+ @part_of_speech = pos # String or [String] or []
62
+ @explanations = get_explanations # [Camdict::Explanation]
63
+ @ipa = get_ipa # Struct uk:String,us:String,k:[],s:[]
64
+ @pronunciation = get_pronunciation # Struct uk:Link, us:Link
65
+ @region = get_region # String
66
+ @usage = get_usage # String
67
+ @gc = get_gc # String
68
+ @plural = get_plural # String or [String]
69
+ @guided = get_guided_word # String
70
+ @verb = get_irregular # Struct Irregular
71
+ end
72
+
73
+ private
74
+ # Get the definition page title word, which is either a word or phrase.
75
+ # This is necessary because it doesn't always get the searched
76
+ # word exactly. For instance, searching baldness gets bald. This is
77
+ # how the online dictionary is organised -- when words having
78
+ # the same root they often share the same explanations.
79
+ # <h2 class="di-title cdo-section-title-hw">look at sth</h2>
80
+ def title_word
81
+ css_text ".di-title.cdo-section-title-hw"
82
+ end
83
+
84
+ # Some words have more than one derived words, like plagiarize has two.
85
+ # Return an Array of derived words or nil when no derived word found
86
+ # <span class=runon-title" title="Derived word">
87
+ # <span class="w">plagiarism
88
+ def derived_words
89
+ node = @html.css('[title="Derived word"]')
90
+ node.map { |e| e.content } unless node.empty?
91
+ end
92
+
93
+ # Get the variant word or phrase inside di-info block but exclude those
94
+ # inside phrase-block or spelling variant, from where is part of the
95
+ # definition header.
96
+ # Such as, US/UK variant, or hasing the same meaning, but
97
+ # different pronunciation.
98
+ # There are more than one variant for one entry, such as ruby, aluminium
99
+ def get_head_variant
100
+ # aluminium: aluminum, Al
101
+ node = @html.css(".di-info .var .v[title='Variant form']")
102
+ node.map { |n| n.text } unless node.empty?
103
+ end
104
+
105
+ # Body variant is inside the di-body block. This is useful to get their
106
+ # part of speech, such as e-book.
107
+ def get_body_variant
108
+ css_text ".di-body .v[title='Variant form']"
109
+ end
110
+
111
+ # Get spelling variants, which have same pronunciations.
112
+ def spell_variant
113
+ # plagiarize: plagiarise
114
+ css_text(".spellvar .v[title='Variant form']")
115
+ end
116
+
117
+ # Irregular plural, like criteria
118
+ def get_inflection
119
+ css_text ".di-info .inf"
120
+ end
121
+
122
+ # Get phrase and its variant which are not flattened yet
123
+ def get_phrase
124
+ node = @html.css(".phrase, .phrase-info .v[title='Variant form']")
125
+ node.map { |n| n.text } unless node.empty?
126
+ end
127
+
128
+ # Where are the searched word's part of speech, IPAs, prounciations
129
+ # It could be found either at the position of "title" or "derived",
130
+ # or "head_variant", "spellvar", "phrase", "idiom".
131
+ # Other places are still "unknown".
132
+ def where?
133
+ location = "title" if @word == @title_word
134
+ unless @title_word.nil?
135
+ location = "title" if @title_word.include?("/") &&
136
+ @title_word.flatten.include?(@word)
137
+ end
138
+ location = "idiom" if @is_idiom && @title_word.include?(@word)
139
+ unless @spelling_variant.nil?
140
+ # spelling variant is treated as "title word"
141
+ location = "spellvar" if @spelling_variant.include? @word
142
+ end
143
+ unless @head_variant.nil?
144
+ location = "head_variant" if @head_variant.include? @word
145
+ end
146
+ location ="body_variant" if @body_variant && @body_variant.include?(@word)
147
+ location = "inflection" if @inflection && @inflection.include?(@word)
148
+ unless @derived_words.nil?
149
+ if @derived_words.include? @word
150
+ unless location.nil?
151
+ #'ruby' has two locations title and derived
152
+ location = [location, "derived"]
153
+ else
154
+ location = "derived"
155
+ end
156
+ end
157
+ end
158
+ unless @phrase.nil?
159
+ location = "phrase" if @phrase.has?(@word) && @word.include?(" ")
160
+ # rubbers has no space, but it's treated as a phrase.
161
+ location = "phrase" if @phrase.include? @word
162
+ end
163
+ location ||= "unknown"
164
+ end
165
+
166
+ # * When the searched word is a title word
167
+ # <span class="di-info">
168
+ # For noun, verb, adj, adv, pronoun, prep, conj, exclamation:
169
+ # <span class="posgram">
170
+ # <span class="pos" title="A word that ...">noun</span>
171
+ # For phrasal verb: reach out to sb
172
+ # <span class="anc-info-head">
173
+ # <span class="pos" title="Verb with an adverb ...">phrasal verb</span>
174
+ # ... same as above line ... verb ...
175
+ # For idiom:
176
+ # "curiosity killed the cat"
177
+ # <span class="lab" title="A short, well-know ...">
178
+ # <span class="usage" title="A short ...">saying</span>
179
+ # or "can't get your head around sth"
180
+ # <span class="usage" title="A short ...">informal</span>
181
+ # or "set/put the seal on sth" and many other idioms have no di-info, but
182
+ # all should have di-body idiom-block idiom-body
183
+ # * When the searched word is a derived word
184
+ # <span class="runon">...<span class="runon-info">
185
+ # <span class="posgram"><span class="pos">noun
186
+ # * When there are more than one part of speech on the same page, like,
187
+ # 'ruby': adjective and noun are both returned.
188
+ # * When the dictionary has no direct answer - unknown
189
+ def pos
190
+ pos_ret = []
191
+ loc = where?
192
+ loc = [loc] if loc.is_a? String
193
+ loc.each { |loca|
194
+ case loca
195
+ when 'title', 'head_variant', 'body_variant', 'spellvar', 'inflection'
196
+ # for phrasal verb
197
+ node = @html.css(".anc-info-head > .pos")
198
+ # center has two pos, noun,verb; centre: noun, adj.
199
+ node = @html.css(".di-info .pos") if node.empty?
200
+ pos_ret += node.map {|n| n.text} unless node.empty?
201
+ when 'idiom'
202
+ pos_ret << "idiom"
203
+ when 'derived'
204
+ derived_css(".runon-info .posgram .pos") { |node|
205
+ pos_ret << node.text
206
+ }
207
+ when 'unknown'
208
+ #"Unknown or don't have a part of speech"
209
+ end
210
+ }
211
+ return pos_ret.pop if pos_ret.length == 1
212
+ pos_ret
213
+ end
214
+
215
+ # Get explanations inside a definition block
216
+ def get_explanations
217
+ defblocks = @html.css(".sense-body > .def-block")
218
+ exps = defblocks.map { |db|
219
+ Camdict::Explanation.new(db)
220
+ }
221
+ loc = where?
222
+ loc = [loc] if loc.is_a? String
223
+ loc.each { |loca|
224
+ case loca
225
+ when 'title', 'head_variant', 'spellvar', 'inflection'
226
+ # Got it already
227
+ when 'derived'
228
+ derived_css(".def-block") { |node|
229
+ exps << Camdict::Explanation.new(node)
230
+ }
231
+ when 'phrase'
232
+ phrase_css(".def-block") { |node|
233
+ exps << Camdict::Explanation.new(node)
234
+ }
235
+ when 'idiom'
236
+ node = @html.css(".idiom-block .def-block")
237
+ exps << Camdict::Explanation.new(node)
238
+ end
239
+ }
240
+ exps
241
+ end
242
+
243
+ # Parse html and check whether there is idiom related block.
244
+ def is_idiom?
245
+ node = @html.css(".idiom-block .idiom-body")
246
+ true unless node.empty?
247
+ end
248
+
249
+ # A word may has uk and us written pronouncation. Superscripts in an IPA
250
+ # are stored in an array, k for UK, s for US. The returned IPA Struct likes,
251
+ # uk: String, us:String, k:[position1, length1, position2, length2],
252
+ # s: [position, length]
253
+ # Position is the superscript index in the IPA, and the next number length
254
+ # is the length of this superscript.
255
+ def get_ipa
256
+ # UK is always the first one
257
+ uknode = @html.at_css ".di-info .ipa"
258
+ # phrase or idiom has no IPA
259
+ return IPA.new if uknode.nil?
260
+ ukbase = parse_ipa(uknode)
261
+ # in most cases they are same
262
+ usbase = ukbase
263
+ loc = where?
264
+ loc = [loc] if loc.is_a? String
265
+ loc.each { |loca|
266
+ case loca
267
+ when 'title', 'spellvar'
268
+ # US IPA is always followed by a symbol US
269
+ # favorite: UK/US ipa (spellvar US s:favorite) => normal title word
270
+ usnode = @html.css ".di-info img.ussymbol + .pron .ipa"
271
+ usbase = parse_ipa(usnode) unless usnode.nil?
272
+ when 'inflection'
273
+ usnode = @html.css ".info-group img.ussymbol + .pron .ipa"
274
+ usbase = parse_ipa(usnode) unless usnode.nil?
275
+ ukinfnode = @html.css ".info-group .pron .ipa"
276
+ ukinf = parse_ipa(ukinfnode) unless ukinfnode.nil?
277
+ if usbase[:baseipa] && usbase[:baseipa].include?('-')
278
+ usbase = join_ipa(ukbase, usbase)
279
+ end
280
+ if ukinf[:baseipa] && ukinf[:baseipa].include?('-')
281
+ ukbase = join_ipa(ukbase, ukinf)
282
+ end
283
+ when 'head_variant'
284
+ # variant word's IPA can be got from its definition page when it is a
285
+ # title word, or from the bracket. Like,
286
+ # aluminium: UK ipa, (variant s:aluminum: US ipa) => in bracket
287
+ # behove: UK ipa, US ipa (variant US s:behoove ipa) => in bracket
288
+ # Many other variants have no IPA inside the bracket and title word's
289
+ # IPA are not theirs.
290
+ # eraser: UK ipa, US ipa US (variant UK s:rubber) => no IPA
291
+ # plane: UK/US ipa (variant UK s:aeroplane, US s:airplane) => no IPA
292
+ # aeroplane: UK ipa,US ipa (variant US s:airplane) => no IPA
293
+ # ass: UK/US ipa, | variant UK s:arse => no IPA
294
+ # sledge: UK ipa, (variant US s:sled) => no IPA
295
+ # titbit: UK/US ipa, (variant US s:tidbit) => no IPA
296
+ node = @html.css ".di-info .var .ipa"
297
+ node.empty? ? (return IPA.new) : ukbase = usbase = parse_ipa(node)
298
+ return IPA.new unless ukbase[:baseipa]
299
+ when 'derived'
300
+ derived_uk = nil
301
+ derived_css('.ipa') { |node|
302
+ derived_uk = parse_ipa(node.first) unless node.first.nil?
303
+ }
304
+ derived_css("img.ussymbol + .pron .ipa") { |node|
305
+ usbase = parse_ipa(node) unless node.nil?
306
+ }
307
+ if derived_uk && derived_uk[:baseipa].include?('-')
308
+ ukbase = join_ipa(ukbase, derived_uk)
309
+ elsif derived_uk
310
+ # uk base may come from the derived word, such as fermentation.
311
+ ukbase = derived_uk
312
+ end
313
+ end
314
+ }
315
+ if usbase[:baseipa] && usbase[:baseipa].include?('-')
316
+ usbase = join_ipa(ukbase, usbase)
317
+ end
318
+ uk, k = ukbase[:baseipa], ukbase[:sindex]
319
+ us, s = usbase[:baseipa], usbase[:sindex]
320
+ IPA.new(uk, k, us, s)
321
+ end
322
+
323
+ # Parse an ipa node to get the ipa string and its superscript index
324
+ def parse_ipa(node)
325
+ position = 0
326
+ pindex = []
327
+ node.children.each { |c|
328
+ len = c.text.length
329
+ pindex += [position,len] if c["class"] == "sp"
330
+ position += len
331
+ }
332
+ pindex = nil if pindex.empty?
333
+ { baseipa: node.text, sindex: pindex }
334
+ end
335
+
336
+ # A short IPA begins with a hyphen, which shares a common beginning with the
337
+ # full IPA. Return the joined result for the short one. The superscripts
338
+ # are added when the common parts have that or removed if the non common
339
+ # parts override them.
340
+ def join_ipa(full_sp, short_sp)
341
+ # understand -sd-; preparation -Sddss-; imaginary -dssds-
342
+ # plagiarise -ssdddsss; dictionary -dsss; painting -sdss
343
+ # harmfully -d
344
+ # toxic ssddd-; privacy sssd-; formally sssd-; harmful ssssds-
345
+ full, basesp = full_sp[:baseipa], full_sp[:sindex]
346
+ short, ussp = short_sp[:baseipa], short_sp[:sindex]
347
+ slen = short.length
348
+ flen = full.length
349
+ if short[0] == '-'
350
+ # head-tail hyphen
351
+ if short[-1] == '-'
352
+ center = short[1, slen-2]
353
+ position = full.index(center[0])
354
+ # match left
355
+ if position && (slen - 2 < flen - 1 - position)
356
+ findex = mix_spi(basesp, 0..position-1, ussp, position-1,
357
+ basesp, position+slen-2..flen-1)
358
+ ret = full[0..position-1] + center + full[position+slen-2..flen-1]
359
+ return {baseipa: ret, sindex: findex}
360
+ end
361
+ position = full.index(center[-1])
362
+ # match right
363
+ if position && (position + 1 > slen - 2)
364
+ findex = mix_spi(basesp, 0..position-slen+2, ussp, position-slen+2,
365
+ basesp, position+1..flen-1)
366
+ ret = full[0..position-slen+2] + center + full[position+1..flen-1]
367
+ return {baseipa: ret, sindex: findex}
368
+ end
369
+ raise "unmatched head-tail hyphen IPA"
370
+ else
371
+ # head hyphen
372
+ right = short[1, slen-1]
373
+ position = full.index(right[0])
374
+ # match left #&& plagiarism fails this test
375
+ if position #&& (flen-position >= slen-1)
376
+ findex = mix_spi( basesp, 0..position-1, ussp, position-1)
377
+ ret = full[0..position-1] + right
378
+ return {baseipa: ret, sindex: findex}
379
+ end
380
+ position = full.index(right[-1])
381
+ # match right
382
+ if position && (position+1 >= slen-1)
383
+ findex = mix_spi(basesp, 0..position-slen+1, ussp, position-slen+1)
384
+ ret = full[0..position-slen+1] + right
385
+ return {baseipa: ret, sindex: findex}
386
+ end
387
+ # unmatched case, like harmfulness
388
+ findex = mix_spi(basesp, 0..flen-1, ussp, flen-1)
389
+ ret = full + right
390
+ return {baseipa: ret, sindex: findex}
391
+ end
392
+ # tail hyphen
393
+ elsif short[-1] == '-'
394
+ left = short[0, slen-1]
395
+ # match left
396
+ # unicode of secondary stress & stress mark are considered
397
+ if ["\u{2cc}", "\u{2c8}"].include? left[0]
398
+ if left[0,2] == full[0,2]
399
+ ret = left + full[slen-1..flen-1]
400
+ findex = mix_spi( ussp, 0, basesp, slen-1..flen-1)
401
+ return {baseipa: ret, sindex: findex}
402
+ end
403
+ elsif left[0] == full[0]
404
+ ret = left + full[slen-1..flen-1]
405
+ findex = mix_spi( ussp, 0, basesp, slen-1..flen-1)
406
+ return {baseipa: ret, sindex: findex}
407
+ else
408
+ raise "tail hyphen has uncovered case - code needs update."
409
+ end
410
+ else
411
+ raise "IPA doesn't begin or end with a hyphen, nothing is done."
412
+ end
413
+ end
414
+
415
+ # Determine whether or not the range is included by the superscript index.
416
+ # Return the pair of index array when it is included by that. Or return nil.
417
+ def at_range(spindex, range)
418
+ return if spindex.nil?
419
+ ret = []
420
+ spindex.each_pair { |position, len|
421
+ ret += [position, len] if range.include? position
422
+ }
423
+ return nil if ret.empty?
424
+ ret
425
+ end
426
+
427
+ # Mix the superscript index. Return mixed result or nil if no superscript.
428
+ # Each pair of array element is superscript index and a Range/Fixnum.
429
+ # All of them are part of two superscripts that need joining. Only the
430
+ # superscripts in range are kept, and the index of the superscript with
431
+ # a number is increased by this number. Finally, the joined superscript is
432
+ # returned.
433
+ def mix_spi(*p)
434
+ findex = []
435
+ p.each_pair { |spindex, r_or_n|
436
+ if spindex and r_or_n.kind_of? Range
437
+ aindex = at_range(spindex, r_or_n)
438
+ findex += aindex if aindex
439
+ elsif spindex and r_or_n.is_a? Fixnum
440
+ bindex = []
441
+ spindex.each_pair { |p, i|
442
+ bindex += [p + r_or_n, i]
443
+ }
444
+ findex += bindex unless bindex.empty?
445
+ end
446
+ }
447
+ return nil if findex.empty?
448
+ findex
449
+ end
450
+
451
+ # Get the UK/US pronunciation mp3/ogg links
452
+ def get_pronunciation
453
+ # parameter pron is a Nokigiri::Node
454
+ links = lambda { |pron|
455
+ unless pron.empty?
456
+ pron.each { |a|
457
+ return Link.new a['data-src-mp3'], a['data-src-ogg']
458
+ }
459
+ else
460
+ return Link.new
461
+ end
462
+ }
463
+ ukpron = uspron = []
464
+ loc = where?
465
+ loc = [loc] if loc.is_a? String
466
+ loc.each { |loca|
467
+ case loca
468
+ when 'title', 'spellvar'
469
+ ukpron = @html.css(".di-info a.pron-uk")
470
+ uspron = @html.css(".di-info a.pron-us")
471
+ when 'derived'
472
+ derived_css("a.pron-uk") { |node|
473
+ ukpron = node
474
+ }
475
+ derived_css("a.pron-us") { |node|
476
+ uspron = node
477
+ }
478
+ end
479
+ }
480
+ uklinks = links.call(ukpron)
481
+ uslinks = links.call(uspron)
482
+ Pronunciation.new(uklinks, uslinks)
483
+ end
484
+
485
+ # Get a word or phrase's region. Possible values: UK, US.
486
+ def get_region
487
+ ret = nil
488
+ loc = where?
489
+ loc = [loc] if loc.is_a? String
490
+ loc.each { |loca|
491
+ case loca
492
+ when 'title', 'idiom'
493
+ ret = css_text(".di-info > .lab .region")
494
+ ret = css_text(".di-info > .lab") unless ret && !ret.empty?
495
+ when 'spellvar'
496
+ ret = css_text(".spellvar .region")
497
+ when 'head_variant'
498
+ ret = css_text(".di-info .var .region")
499
+ when 'derived'
500
+ derived_css(".region") { |node|
501
+ ret = node.text unless node.empty?
502
+ }
503
+ when 'phrase'
504
+ phrase_css(".region") { |node|
505
+ ret = node.text unless node.empty?
506
+ }
507
+ end
508
+ }
509
+ ret
510
+ end
511
+
512
+ # Parse and get the usage
513
+ def get_usage
514
+ ret = nil
515
+ loc = where?
516
+ loc = [loc] if loc.is_a? String
517
+ loc.each { |loca|
518
+ case loca
519
+ when 'title', 'idiom', 'spellvar'
520
+ ret = css_text(".di-info > .lab .usage")
521
+ when 'head_variant'
522
+ ret = css_text(".di-info .var .usage")
523
+ when 'derived'
524
+ derived_css(".usage") { |node|
525
+ ret = node.text unless node.empty?
526
+ }
527
+ when 'phrase'
528
+ phrase_css(".usage") { |node|
529
+ ret = node.text unless node.empty?
530
+ }
531
+ end
532
+ }
533
+ ret
534
+ end
535
+
536
+ # Get grammar code
537
+ def get_gc
538
+ ret = nil
539
+ loc = where?
540
+ loc = [loc] if loc.is_a? String
541
+ loc.each { |loca|
542
+ case loca
543
+ when 'title', 'idiom', 'spellvar', 'head_variant'
544
+ ret = css_text(".di-info .gcs")
545
+ when 'derived'
546
+ derived_css(".gcs") { |node|
547
+ ret = node.text unless node.empty?
548
+ }
549
+ when 'phrase'
550
+ phrase_css(".gcs") { |node|
551
+ ret = node.text unless node.empty?
552
+ }
553
+ end
554
+ }
555
+ ret
556
+ end
557
+
558
+ # Return values: String, [String], nil
559
+ def get_plural
560
+ return unless @part_of_speech.include? 'noun'
561
+ ret = nil
562
+ node = @html.css(".di-info .inf-group[type='plural'] .inf")
563
+ unless node.empty?
564
+ # fish has two
565
+ if node.size > 1
566
+ ret = node.map { |n| n.text }
567
+ elsif node.size == 1
568
+ ret = node.text
569
+ end
570
+ end
571
+ ret
572
+ end
573
+
574
+ # Parse and get the guided word
575
+ def get_guided_word
576
+ gw = css_text(".di-info .gw")
577
+ gw.delete "()" if gw
578
+ end
579
+
580
+ # Return nil or Irregular struct
581
+ def get_irregular
582
+ return unless @part_of_speech.include? 'verb'
583
+ present = css_text(".di-info .inf-group[type='pres_part'] .inf")
584
+ past = css_text(".di-info .inf-group[type='past'] .inf")
585
+ sp = pp = past
586
+ if past.nil? || past.empty?
587
+ node = @html.css(".di-info span[class='inf']")
588
+ unless node.empty?
589
+ past = node.map { |n| n.text }
590
+ sp, pp = past
591
+ end
592
+ end
593
+ sp = css_text(".di-info .inf-group[type='past-tense'] .inf") if sp.nil?
594
+ pp = css_text(".di-info .inf-group[type='past-part'] .inf") if pp.nil?
595
+ if sp || pp || present
596
+ return Irregular.new(sp, pp, present)
597
+ end
598
+ end
599
+
600
+ include Camdict::Common
601
+ # Limitation: some irregular words are not reachable(phenomena, arisen)
602
+ # because they are not shown on the search result page. They can be got
603
+ # by their original forms - phenomenon, arise.
604
+
605
+ end
606
+ end