dphil 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/CODE_OF_CONDUCT.md +49 -0
  3. data/Gemfile +6 -0
  4. data/LICENSE +201 -0
  5. data/README.md +54 -0
  6. data/Rakefile +11 -0
  7. data/dphil.gemspec +49 -0
  8. data/exe/dphil +10 -0
  9. data/lib/dphil.rb +53 -0
  10. data/lib/dphil/cache.rb +15 -0
  11. data/lib/dphil/change_list.rb +6 -0
  12. data/lib/dphil/character.rb +236 -0
  13. data/lib/dphil/character_matrix.rb +102 -0
  14. data/lib/dphil/cli.rb +26 -0
  15. data/lib/dphil/cli_commands/csv2ld.rb +71 -0
  16. data/lib/dphil/cli_commands/csv2nex.rb +37 -0
  17. data/lib/dphil/constants.rb +128 -0
  18. data/lib/dphil/converter.rb +58 -0
  19. data/lib/dphil/converters/csv2nex.rb +83 -0
  20. data/lib/dphil/ld_data_set.rb +25 -0
  21. data/lib/dphil/ld_output.rb +29 -0
  22. data/lib/dphil/lemma.rb +44 -0
  23. data/lib/dphil/lemma_list.rb +179 -0
  24. data/lib/dphil/log_formatter.rb +39 -0
  25. data/lib/dphil/logger.rb +27 -0
  26. data/lib/dphil/metrical_data.rb +78 -0
  27. data/lib/dphil/newick.rb +52 -0
  28. data/lib/dphil/paup.rb +34 -0
  29. data/lib/dphil/refinements.rb +8 -0
  30. data/lib/dphil/refinements/natural_sort.rb +52 -0
  31. data/lib/dphil/script_string.rb +124 -0
  32. data/lib/dphil/syllables.rb +43 -0
  33. data/lib/dphil/syllables/syllable.rb +45 -0
  34. data/lib/dphil/tei_xml.rb +142 -0
  35. data/lib/dphil/transliterate.rb +131 -0
  36. data/lib/dphil/tree.rb +142 -0
  37. data/lib/dphil/tree_node.rb +67 -0
  38. data/lib/dphil/verse.rb +25 -0
  39. data/lib/dphil/verse_analysis.rb +509 -0
  40. data/lib/dphil/verse_analysis_new.rb +816 -0
  41. data/lib/dphil/version.rb +30 -0
  42. data/vendor/default_commands.paup +18 -0
  43. data/vendor/metrical_data.yml +4035 -0
  44. metadata +409 -0
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ Dphil::CLI.module_eval do
4
+ desc "Convert a CSV-format collation file into a NEXUS file"
5
+ long_desc <<~EOS
6
+ Convert a CSV-format collation file into a NEXUS file for use with PAUP.
7
+ This expects each column of the CSV to represent data for a single taxon,
8
+ and the first row to contain the names of the taxa.
9
+ EOS
10
+
11
+ arg :csv_file
12
+
13
+ command :csv2nex do |c|
14
+ c.desc "Transpose rows/columns in CSV"
15
+ c.switch :t, :transpose, negatable: false
16
+
17
+ c.desc "Include custom PAUP commands from a file in PAUP block of NEXUS output"
18
+ c.flag :d, :paup_data, arg_name: "file"
19
+
20
+ c.desc "Write NEXUS output to file instead of STDOUT"
21
+ c.flag :o, :outfile, arg_name: "file"
22
+
23
+ c.action do |_, copts, args|
24
+ nexus_output = Dphil::Csv2NexConverter.new(args[0], copts).convert
25
+
26
+ if copts[:outfile].nil?
27
+ puts nexus_output
28
+ else
29
+ abs_outfile = Pathname.new(copts[:outfile]).expand_path
30
+ rel_outfile = abs_outfile.relative_path_from(Pathname.getwd)
31
+ puts "#{File.write(abs_outfile, nexus_output)} bytes written to #{rel_outfile}"
32
+ puts "You can process this file using PAUP with the command\n" \
33
+ "`paup4 [options] #{rel_outfile}`"
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,128 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+
5
+ module Dphil
6
+ module Constants
7
+ using ::Ragabash::Refinements
8
+ DEBUG = if defined?(::Rails) && ::Rails.env[/^dev/]
9
+ true
10
+ elsif !ENV["RUBY_ENV"].nil? && ENV["RUBY_ENV"][/^dev/]
11
+ true
12
+ else
13
+ false
14
+ end
15
+
16
+ # Regular expressions for SLP1 syllables
17
+ begin
18
+ vow = "aAiIuUfFxXeEoO"
19
+ con = "kKgGNcCjJYwWqQRtTdDnpPbBmyrlvzSsh"
20
+ add = "MH"
21
+
22
+ R_SYL = /[']?[#{con}]*[\s]*[#{vow}][#{con}#{add}]*(?![#{vow}])\s*/
23
+ R_GSYL = /[AIUFXeEoO]|[MH]$/
24
+ R_CCONF = /[#{con}]{2}$/
25
+ R_CCON = /[#{con}]{2}/
26
+ end
27
+
28
+ TRANS_CTRL_WORD = /\{#.*?#\}/
29
+ TRANS_CTRL_WORD_CONTENT = /\{#(.*?)#\}/
30
+ TRANS_CTRL_WORD_PROCESSED = /#[a-f0-9]{40}#/
31
+
32
+ # Linked Data types and contexts
33
+ begin
34
+ ld_context_global = {
35
+ "@version" => 1.1,
36
+ "oa" => "http://www.w3.org/ns/oa#",
37
+ "dc" => "http://purl.org/dc/elements/1.1/",
38
+ "xsd" => "http://www.w3.org/2001/XMLSchema#",
39
+ "ubcs" => "http://ld.ubcsanskrit.ca/api#",
40
+ "id" => { "@id" => "dc:identifier" },
41
+ }
42
+
43
+ ld_context_character = {
44
+ "states" => { "@id" => "ubcs:charStateBySymbol", "@container" => "@index" },
45
+ "symbols" => { "@id" => "ubcs:charSymbolByState", "@container" => "@index" },
46
+ "stateTotals" => { "@id" => "ubcs:charStateTotalsByState", "@container" => "@index" },
47
+ "state_totals" => { "@id" => "ubcs:charStateTotalsByState", "@container" => "@index" },
48
+ "taxaStates" => { "@id" => "ubcs:charStateByTaxon", "@container" => "@index" },
49
+ "taxa_states" => { "@id" => "ubcs:charStateByTaxon", "@container" => "@index" },
50
+ "statesTaxa" => { "@id" => "ubcs:taxonByCharState", "@container" => "@index" },
51
+ "states_taxa" => { "@id" => "ubcs:taxonByCharState", "@container" => "@index" },
52
+ "isInformative" => { "@id" => "ubcs:charStateIsInformative" },
53
+ "is_informative" => { "@id" => "ubcs:charStateIsInformative" },
54
+ "isConstant" => { "@id" => "ubcs:charStateIsConstant" },
55
+ "is_constant" => { "@id" => "ubcs:charStateIsConstant" },
56
+ }
57
+
58
+ ld_context_matrix = {
59
+ "taxaNames" => { "@id" => "dc:identifier", "@container" => "@index" },
60
+ "taxa_names" => { "@id" => "dc:identifier", "@container" => "@index" },
61
+ "characters" => {
62
+ "@id" => "ubcs:phyloCharacter",
63
+ "@container" => "@index",
64
+ "@context" => ld_context_character,
65
+ },
66
+ }
67
+
68
+ ld_context_tree_node = {
69
+ "name" => { "@id" => "ubcs:treeNodeName" },
70
+ "length" => { "@id" => "ubcs:branchLength" },
71
+ "parent" => { "@id" => "ubcs:treeNodeParent" },
72
+ "children" => { "@id" => "ubcs:treeNodeChildren" },
73
+ }
74
+
75
+ ld_context_tree = {
76
+ "rootId" => { "@id" => "ubcs:treeRootId" },
77
+ "root_id" => { "@id" => "ubcs:treeRootId" },
78
+ "nodes" => {
79
+ "@id" => "ubcs:treeNode",
80
+ "@container" => "@index",
81
+ "@context" => ld_context_tree_node,
82
+ },
83
+ "stats" => {
84
+ "@id" => "ubcs:treeStats",
85
+ "@context" => {
86
+ "ci" => { "@id" => "ubcs:treeCI" },
87
+ "ciEx" => { "@id" => "ubcs:treeCIEx" },
88
+ "ci_ex" => { "@id" => "ubcs:treeCIEx" },
89
+ "hi" => { "@id" => "ubcs:treeHI" },
90
+ "hiEx" => { "@id" => "ubcs:treeHIEx" },
91
+ "hi_ex" => { "@id" => "ubcs:treeHIEx" },
92
+ "length" => { "@id" => "ubcs:treeLengh" },
93
+ "rc" => { "@id" => "ubcs:treeRC" },
94
+ "ri" => { "@id" => "ubcs:treeRI" },
95
+ },
96
+ },
97
+ }
98
+
99
+ ld_context_dataset = {
100
+ "matrix" => {
101
+ "@id" => "ubcs:characterMatrix",
102
+ "@context" => ld_context_matrix,
103
+ },
104
+ "trees" => {
105
+ "@id" => "ubcs:tree",
106
+ "@container" => "@index",
107
+ "@context" => ld_context_tree,
108
+ },
109
+ }
110
+
111
+ LD_TYPES = {
112
+ "Dphil::Character" => "ubcs:phyloCharacter",
113
+ "Dphil::CharacterMatrix" => "ubcs:characterMatrix",
114
+ "Dphil::TreeNode" => "ubcs:treeNode",
115
+ "Dphil::Tree" => "ubcs:tree",
116
+ "Dphil::LDDataSet" => "ubcs:dataSet",
117
+ }.deep_freeze
118
+
119
+ LD_CONTEXTS = {
120
+ "Dphil::Character" => ld_context_global.merge(ld_context_character),
121
+ "Dphil::CharacterMatrix" => ld_context_global.merge(ld_context_matrix),
122
+ "Dphil::TreeNode" => ld_context_global.merge(ld_context_tree_node),
123
+ "Dphil::Tree" => ld_context_global.merge(ld_context_tree),
124
+ "Dphil::LDDataSet" => ld_context_global.merge(ld_context_dataset),
125
+ }.deep_freeze
126
+ end
127
+ end
128
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dphil
4
+ #
5
+ # Base module for file converters (CSV, NEXUS, CollateX, etc.)
6
+ #
7
+ module Converter
8
+ private
9
+
10
+ # Load a file
11
+ def load_file(infile)
12
+ raise IOError, "File #{infile} not found." unless File.exist?(infile)
13
+ File.read(infile)
14
+ end
15
+
16
+ # Load a CSV file
17
+ def load_csv(infile, mode = "r")
18
+ raise IOError, "File #{infile} not found." unless File.exist?(infile)
19
+ CSV.read(infile, mode)
20
+ end
21
+
22
+ # Return a hash of array sorted/weighted by number of identical entries
23
+ def weighted_uniq(array)
24
+ weighted_hash = array.each_with_object({}) do |v, acc|
25
+ acc[v] ||= 0
26
+ acc[v] += 1
27
+ end
28
+ n = 0
29
+ (weighted_hash.sort_by do |x|
30
+ n += 1
31
+ [-x[1], n]
32
+ end).to_h
33
+ end
34
+
35
+ # Sanitize a character string to basic KH/ASCII
36
+ def sanitize_char(str)
37
+ str = str.to_s
38
+ src = Sanscript.detect(str) || :iast
39
+ str = Sanscript.transliterate(str, src, :kh)
40
+ str.gsub!(/\s/, "_")
41
+ str.tr!("'", "`")
42
+ str.strip!
43
+ str
44
+ end
45
+
46
+ # Tokenize the values of a character
47
+ def tokenize(characters)
48
+ char_set = weighted_uniq(characters.map { |c| sanitize_char(c) }.reject(&:empty?))
49
+ char_set.each_with_object({}).with_index do |(char, acc), i|
50
+ acc[char[0]] = [ALPHABET[i], char[1]]
51
+ end
52
+ end
53
+
54
+ # NEX Token Alphabet
55
+ ALPHABET = IceNine.deep_freeze(("A".."Z").to_a + ("a".."z").to_a)
56
+ private_constant :ALPHABET
57
+ end
58
+ end
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dphil
4
+ #
5
+ # CSV to NEXUS file converter class
6
+ #
7
+ class Csv2NexConverter
8
+ include Dphil::Converter
9
+
10
+ def initialize(csv_file, opts = {})
11
+ opts = opts.to_h
12
+
13
+ # Load csv file
14
+ @csv = load_csv(csv_file, "r:bom|utf-8")
15
+ @csv = @csv.transpose if opts[:transpose]
16
+
17
+ # Load paup file
18
+ if opts[:paup_data].nil?
19
+ opts[:paup_data] = File.join(GEM_ROOT, "vendor", "default_commands.paup")
20
+ end
21
+ @paup = load_file(opts[:paup_data])
22
+ @paup << "\n" unless @paup.blank? || @paup[-1] == "\n"
23
+ @paup.indent!(2)
24
+ @paup.freeze
25
+ end
26
+
27
+ # Perform the conversion and return a string result
28
+ def convert
29
+ # Setup taxa information and orientation
30
+ taxa_count = @csv.first.count
31
+ character_count = @csv.count - 1
32
+ taxa_labels = @csv.first.map { |name| name.to_s.strip.scrub.gsub(/[^A-Za-z0-9]/, "_") }
33
+
34
+ # Generate labels and matrix
35
+ character_labels = []
36
+ character_matrix = taxa_labels.map { |t| [t] }
37
+ (1..character_count).each do |r|
38
+ row = @csv[r]
39
+ token_hash = tokenize(row)
40
+ character_label = (token_hash.map do |k, _|
41
+ "'#{sanitize_char(k)}'"
42
+ end).join(" ")
43
+ character_labels << %(#{r} /#{character_label})
44
+ row.each_with_index do |charstate, i|
45
+ token = token_hash[sanitize_char(charstate)]
46
+ character_matrix[i] << (token.nil? ? "-" : token[0])
47
+ end
48
+ end
49
+ character_matrix.map! do |arr|
50
+ "#{arr.shift} #{arr.join('')}"
51
+ end
52
+
53
+ # Return NEXUS output
54
+ <<~NEXUS_EOF
55
+ #NEXUS
56
+
57
+ BEGIN TAXA;
58
+ TITLE Manuscripts;
59
+ DIMENSIONS NTAX=#{taxa_count};
60
+ TAXLABELS #{taxa_labels.join(' ')};
61
+ END;
62
+
63
+ BEGIN CHARACTERS;
64
+ TITLE Variant_Matrix;
65
+ DIMENSIONS NCHAR=#{character_count};
66
+ FORMAT DATATYPE = STANDARD RESPECTCASE GAP = - MISSING = ? SYMBOLS = "#{ALPHABET.join(' ')}";
67
+ CHARSTATELABELS #{character_labels.join(', ')};
68
+ MATRIX
69
+ #{character_matrix.join("\n ")}
70
+ ;
71
+
72
+ END;
73
+
74
+ BEGIN ASSUMPTIONS;
75
+ OPTIONS DEFTYPE = UNORD;
76
+ END;
77
+
78
+ BEGIN PAUP;
79
+ #{@paup}END;
80
+ NEXUS_EOF
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dphil
4
+ class LDDataSet
5
+ include Dphil::LDOutput
6
+
7
+ attr_reader :matrix, :trees
8
+
9
+ def initialize(matrix:, trees:)
10
+ @matrix = matrix
11
+ @trees = trees
12
+ end
13
+
14
+ def to_h
15
+ {
16
+ matrix: matrix,
17
+ trees: trees,
18
+ }
19
+ end
20
+
21
+ def as_json(options = nil)
22
+ to_h.as_json(options)
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dphil
4
+ #
5
+ # Mixin module for Linked Data output
6
+ #
7
+ # Requires that a class implements +#as_json+
8
+ #
9
+ module LDOutput
10
+ using Dphil::Refinements::NaturalSort
11
+ # Outputs a Linked Data Hash
12
+ def as_jsonld(**options)
13
+ ld = {
14
+ "@context" => options.delete(:context) || Constants::LD_CONTEXTS[self.class.name],
15
+ "@type" => options.delete(:ld_type) || Constants::LD_TYPES[self.class.name],
16
+ }.merge!(as_json(options))
17
+
18
+ ld_expanded = JSON::LD::API.expand(ld)
19
+ return ld_expanded if options[:compact] == false
20
+
21
+ ld_compact = JSON::LD::API.compact(ld_expanded, ld["@context"])
22
+ { "@context" => ld_compact.delete("@context") }.merge!(ld_compact.natural_sort_keys)
23
+ end
24
+
25
+ def to_jsonld(**options)
26
+ as_jsonld(options).to_json(options)
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dphil
4
+ # Public: A storage object for words and groups of words from TEI XML data.
5
+ # Also contains information about the source/location of the words.
6
+ # Immutable.
7
+ class Lemma
8
+ using ::Ragabash::Refinements
9
+ # Public: Returns the raw source data for the lemma.
10
+ attr_reader :source, :text, :page, :facs, :line, :index
11
+
12
+ # Public: Initialize a lemma object.
13
+ #
14
+ # source - XML data to initialize the lemma from
15
+ def initialize(source = "", index = nil)
16
+ @source = source.strip
17
+ @index = index
18
+
19
+ xml = Nokogiri::XML("<lemma>#{source}</lemma>") { |config| config.strict.noent }
20
+ xml.encoding = "UTF-8"
21
+
22
+ @text = xml.text.strip.gsub(/\-+\s*\-*/, "")
23
+ @page = xml.css("pb").map { |el| el.attr("n") }.join(",")
24
+ @facs = xml.css("pb").map { |el| el.attr("facs") }.join(",")
25
+ @line = xml.css("lb").map { |el| el.attr("n") }.join(",")
26
+ rescue Nokogiri::XML::SyntaxError => e
27
+ $stderr.puts "Error in Lemma.new(`#{source}`, ...): #{e}"
28
+ abort
29
+ end
30
+
31
+ def to_s
32
+ "(#{index}|#{page}:#{line}) #{text}"
33
+ end
34
+
35
+ def to_sym
36
+ "<Lemma>#{self}".to_sym
37
+ end
38
+
39
+ def ==(other)
40
+ return false unless other.is_a?(Dphil::Lemma)
41
+ source == other.source
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,179 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "nokogiri"
4
+
5
+ module Dphil
6
+ # An object containing a list of lemmata generated through SAX parsing of an
7
+ # XML document.
8
+ # Immutable.
9
+ class LemmaList < ::Nokogiri::XML::SAX::Document
10
+ using ::Ragabash::Refinements
11
+ include Enumerable
12
+
13
+ attr_reader :name
14
+
15
+ def initialize(source)
16
+ @members = []
17
+ source = source.to_s.strip
18
+ return if source.empty?
19
+ @lemma_ignore_start_tags = Set.new(%w[TEI text body pre post div])
20
+ @lemma_ignore_end_tags = @lemma_ignore_start_tags + Set.new(%w[pb lb])
21
+ @index = 0
22
+ @open_elements = []
23
+ @current_pb = []
24
+ @current_lb = []
25
+ @current_chars = ""
26
+ @current_lemma = []
27
+ @inside_hyphen = false
28
+ @empty_element = true
29
+
30
+ @parser = Nokogiri::XML::SAX::Parser.new(self)
31
+ @parser.parse(source)
32
+ end
33
+
34
+ def each(&block)
35
+ @members.each(&block)
36
+ end
37
+
38
+ def members(limit = nil)
39
+ return @members[0, limit] if limit.is_a? Numeric
40
+ @members
41
+ end
42
+
43
+ def [](*args)
44
+ @members[*args]
45
+ end
46
+
47
+ def get(index)
48
+ raise "Non-numeric index passed to Lemma.get" unless index.is_a? Numeric
49
+ if index < 1
50
+ warn "Minimum index of Lemma.get() is 1"
51
+ index = 1
52
+ end
53
+ @members[index - 1]
54
+ end
55
+
56
+ def size
57
+ @members.size
58
+ end
59
+
60
+ def to_s
61
+ @members.map(&:text).join("\n")
62
+ end
63
+
64
+ def cx_tokens
65
+ @members.map do |lemma|
66
+ out = {
67
+ t: lemma.text,
68
+ n: Transliterate.normalize_iast(lemma.text),
69
+ i: lemma.index,
70
+ p: lemma.page,
71
+ f: lemma.facs,
72
+ l: lemma.line,
73
+ }
74
+ warn "Token empty: #{out}" if out[:t].empty?
75
+ out
76
+ end
77
+ end
78
+
79
+ private
80
+
81
+ def start_element(name, attrs = [])
82
+ return if @lemma_ignore_start_tags.include?(name)
83
+
84
+ if %w[pb lb].include?(name)
85
+ el = gen_xmlel(name, attrs, true)
86
+ if @current_lemma.empty?
87
+ instance_variable_set("@current_#{name}", [el])
88
+ else
89
+ instance_variable_get("@current_#{name}") << el
90
+ end
91
+ else
92
+ el = gen_xmlel(name, attrs)
93
+ @open_elements << gen_xmlel(name, attrs)
94
+ end
95
+
96
+ @empty_element = true
97
+ @current_lemma << el unless el.empty?
98
+ end
99
+
100
+ def end_element(name)
101
+ return if @lemma_ignore_end_tags.include?(name)
102
+
103
+ if @empty_element
104
+ @current_lemma[-1] = @current_lemma[-1].gsub(%r{/*>\z}, "/>")
105
+ @empty_element = false
106
+ else
107
+ @current_lemma << "</#{name}>"
108
+ end
109
+ @open_elements.pop
110
+ end
111
+
112
+ def characters(string)
113
+ @empty_element = false
114
+ string.split(/(\s)/).reject(&:empty?).each do |lemma|
115
+ @current_chars += lemma.strip
116
+
117
+ if lemma.match?(/\-$/)
118
+ @inside_hyphen = true
119
+ elsif lemma.match?(/^\-?[^\s]/)
120
+ @inside_hyphen = false
121
+ end
122
+
123
+ if lemma.match(/^\s+$/) && !@inside_hyphen
124
+ finalize
125
+ next
126
+ end
127
+
128
+ text = lemma.strip
129
+ @current_lemma << text unless text.empty?
130
+ end
131
+ end
132
+
133
+ def end_document
134
+ finalize
135
+ (instance_variables - [:@members]).each do |var|
136
+ remove_instance_variable(var)
137
+ end
138
+ end
139
+
140
+ def gen_xmlel(name, attrs, self_closing = false)
141
+ attr_list = attrs.reduce("") do |result, attr|
142
+ %(#{result} #{attr[0]}="#{attr[1].gsub('"', '&quot;')}")
143
+ end
144
+ self_closing ? "<#{name}#{attr_list}/>" : "<#{name}#{attr_list}>"
145
+ end
146
+
147
+ def gen_xmlclose(el)
148
+ el.gsub(/^<([^\s\>]+).*/, '</\\1>')
149
+ end
150
+
151
+ def append_lemma
152
+ return unless @current_chars.match?(/[^\s\-\.\|]+/) # if not .empty?
153
+ new_lemma_source = @current_lemma.join("")
154
+ new_lemma = Lemma.new(new_lemma_source, @index)
155
+ @index += 1
156
+ @members << new_lemma
157
+ end
158
+
159
+ def finalize
160
+ return if @current_lemma.empty?
161
+ @current_lemma.unshift(@current_lb.first) unless @current_lemma[0] == @current_lb.first
162
+ @current_lemma.unshift(@current_pb.first) unless @current_lemma[0] == @current_pb.first
163
+
164
+ # Make sure missing open or close tags are inserted
165
+ unless @open_elements.empty?
166
+ @current_lemma.concat(@open_elements.reverse.map { |e| gen_xmlclose(e) })
167
+ prime_next = @open_elements.dup
168
+ end
169
+
170
+ append_lemma
171
+
172
+ @current_pb = [@current_pb.last]
173
+ @current_lb = [@current_lb.last]
174
+ @current_chars = ""
175
+ @current_lemma = prime_next || []
176
+ @inside_hyphen = false
177
+ end
178
+ end
179
+ end