dphil 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/CODE_OF_CONDUCT.md +49 -0
  3. data/Gemfile +6 -0
  4. data/LICENSE +201 -0
  5. data/README.md +54 -0
  6. data/Rakefile +11 -0
  7. data/dphil.gemspec +49 -0
  8. data/exe/dphil +10 -0
  9. data/lib/dphil.rb +53 -0
  10. data/lib/dphil/cache.rb +15 -0
  11. data/lib/dphil/change_list.rb +6 -0
  12. data/lib/dphil/character.rb +236 -0
  13. data/lib/dphil/character_matrix.rb +102 -0
  14. data/lib/dphil/cli.rb +26 -0
  15. data/lib/dphil/cli_commands/csv2ld.rb +71 -0
  16. data/lib/dphil/cli_commands/csv2nex.rb +37 -0
  17. data/lib/dphil/constants.rb +128 -0
  18. data/lib/dphil/converter.rb +58 -0
  19. data/lib/dphil/converters/csv2nex.rb +83 -0
  20. data/lib/dphil/ld_data_set.rb +25 -0
  21. data/lib/dphil/ld_output.rb +29 -0
  22. data/lib/dphil/lemma.rb +44 -0
  23. data/lib/dphil/lemma_list.rb +179 -0
  24. data/lib/dphil/log_formatter.rb +39 -0
  25. data/lib/dphil/logger.rb +27 -0
  26. data/lib/dphil/metrical_data.rb +78 -0
  27. data/lib/dphil/newick.rb +52 -0
  28. data/lib/dphil/paup.rb +34 -0
  29. data/lib/dphil/refinements.rb +8 -0
  30. data/lib/dphil/refinements/natural_sort.rb +52 -0
  31. data/lib/dphil/script_string.rb +124 -0
  32. data/lib/dphil/syllables.rb +43 -0
  33. data/lib/dphil/syllables/syllable.rb +45 -0
  34. data/lib/dphil/tei_xml.rb +142 -0
  35. data/lib/dphil/transliterate.rb +131 -0
  36. data/lib/dphil/tree.rb +142 -0
  37. data/lib/dphil/tree_node.rb +67 -0
  38. data/lib/dphil/verse.rb +25 -0
  39. data/lib/dphil/verse_analysis.rb +509 -0
  40. data/lib/dphil/verse_analysis_new.rb +816 -0
  41. data/lib/dphil/version.rb +30 -0
  42. data/vendor/default_commands.paup +18 -0
  43. data/vendor/metrical_data.yml +4035 -0
  44. metadata +409 -0
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ Dphil::CLI.module_eval do
4
+ desc "Convert a CSV-format collation file into a NEXUS file"
5
+ long_desc <<~EOS
6
+ Convert a CSV-format collation file into a NEXUS file for use with PAUP.
7
+ This expects each column of the CSV to represent data for a single taxon,
8
+ and the first row to contain the names of the taxa.
9
+ EOS
10
+
11
+ arg :csv_file
12
+
13
+ command :csv2nex do |c|
14
+ c.desc "Transpose rows/columns in CSV"
15
+ c.switch :t, :transpose, negatable: false
16
+
17
+ c.desc "Include custom PAUP commands from a file in PAUP block of NEXUS output"
18
+ c.flag :d, :paup_data, arg_name: "file"
19
+
20
+ c.desc "Write NEXUS output to file instead of STDOUT"
21
+ c.flag :o, :outfile, arg_name: "file"
22
+
23
+ c.action do |_, copts, args|
24
+ nexus_output = Dphil::Csv2NexConverter.new(args[0], copts).convert
25
+
26
+ if copts[:outfile].nil?
27
+ puts nexus_output
28
+ else
29
+ abs_outfile = Pathname.new(copts[:outfile]).expand_path
30
+ rel_outfile = abs_outfile.relative_path_from(Pathname.getwd)
31
+ puts "#{File.write(abs_outfile, nexus_output)} bytes written to #{rel_outfile}"
32
+ puts "You can process this file using PAUP with the command\n" \
33
+ "`paup4 [options] #{rel_outfile}`"
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,128 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+
5
+ module Dphil
6
+ module Constants
7
+ using ::Ragabash::Refinements
8
+ DEBUG = if defined?(::Rails) && ::Rails.env[/^dev/]
9
+ true
10
+ elsif !ENV["RUBY_ENV"].nil? && ENV["RUBY_ENV"][/^dev/]
11
+ true
12
+ else
13
+ false
14
+ end
15
+
16
+ # Regular expressions for SLP1 syllables
17
+ begin
18
+ vow = "aAiIuUfFxXeEoO"
19
+ con = "kKgGNcCjJYwWqQRtTdDnpPbBmyrlvzSsh"
20
+ add = "MH"
21
+
22
+ R_SYL = /[']?[#{con}]*[\s]*[#{vow}][#{con}#{add}]*(?![#{vow}])\s*/
23
+ R_GSYL = /[AIUFXeEoO]|[MH]$/
24
+ R_CCONF = /[#{con}]{2}$/
25
+ R_CCON = /[#{con}]{2}/
26
+ end
27
+
28
+ TRANS_CTRL_WORD = /\{#.*?#\}/
29
+ TRANS_CTRL_WORD_CONTENT = /\{#(.*?)#\}/
30
+ TRANS_CTRL_WORD_PROCESSED = /#[a-f0-9]{40}#/
31
+
32
+ # Linked Data types and contexts
33
+ begin
34
+ ld_context_global = {
35
+ "@version" => 1.1,
36
+ "oa" => "http://www.w3.org/ns/oa#",
37
+ "dc" => "http://purl.org/dc/elements/1.1/",
38
+ "xsd" => "http://www.w3.org/2001/XMLSchema#",
39
+ "ubcs" => "http://ld.ubcsanskrit.ca/api#",
40
+ "id" => { "@id" => "dc:identifier" },
41
+ }
42
+
43
+ ld_context_character = {
44
+ "states" => { "@id" => "ubcs:charStateBySymbol", "@container" => "@index" },
45
+ "symbols" => { "@id" => "ubcs:charSymbolByState", "@container" => "@index" },
46
+ "stateTotals" => { "@id" => "ubcs:charStateTotalsByState", "@container" => "@index" },
47
+ "state_totals" => { "@id" => "ubcs:charStateTotalsByState", "@container" => "@index" },
48
+ "taxaStates" => { "@id" => "ubcs:charStateByTaxon", "@container" => "@index" },
49
+ "taxa_states" => { "@id" => "ubcs:charStateByTaxon", "@container" => "@index" },
50
+ "statesTaxa" => { "@id" => "ubcs:taxonByCharState", "@container" => "@index" },
51
+ "states_taxa" => { "@id" => "ubcs:taxonByCharState", "@container" => "@index" },
52
+ "isInformative" => { "@id" => "ubcs:charStateIsInformative" },
53
+ "is_informative" => { "@id" => "ubcs:charStateIsInformative" },
54
+ "isConstant" => { "@id" => "ubcs:charStateIsConstant" },
55
+ "is_constant" => { "@id" => "ubcs:charStateIsConstant" },
56
+ }
57
+
58
+ ld_context_matrix = {
59
+ "taxaNames" => { "@id" => "dc:identifier", "@container" => "@index" },
60
+ "taxa_names" => { "@id" => "dc:identifier", "@container" => "@index" },
61
+ "characters" => {
62
+ "@id" => "ubcs:phyloCharacter",
63
+ "@container" => "@index",
64
+ "@context" => ld_context_character,
65
+ },
66
+ }
67
+
68
+ ld_context_tree_node = {
69
+ "name" => { "@id" => "ubcs:treeNodeName" },
70
+ "length" => { "@id" => "ubcs:branchLength" },
71
+ "parent" => { "@id" => "ubcs:treeNodeParent" },
72
+ "children" => { "@id" => "ubcs:treeNodeChildren" },
73
+ }
74
+
75
+ ld_context_tree = {
76
+ "rootId" => { "@id" => "ubcs:treeRootId" },
77
+ "root_id" => { "@id" => "ubcs:treeRootId" },
78
+ "nodes" => {
79
+ "@id" => "ubcs:treeNode",
80
+ "@container" => "@index",
81
+ "@context" => ld_context_tree_node,
82
+ },
83
+ "stats" => {
84
+ "@id" => "ubcs:treeStats",
85
+ "@context" => {
86
+ "ci" => { "@id" => "ubcs:treeCI" },
87
+ "ciEx" => { "@id" => "ubcs:treeCIEx" },
88
+ "ci_ex" => { "@id" => "ubcs:treeCIEx" },
89
+ "hi" => { "@id" => "ubcs:treeHI" },
90
+ "hiEx" => { "@id" => "ubcs:treeHIEx" },
91
+ "hi_ex" => { "@id" => "ubcs:treeHIEx" },
92
+ "length" => { "@id" => "ubcs:treeLengh" },
93
+ "rc" => { "@id" => "ubcs:treeRC" },
94
+ "ri" => { "@id" => "ubcs:treeRI" },
95
+ },
96
+ },
97
+ }
98
+
99
+ ld_context_dataset = {
100
+ "matrix" => {
101
+ "@id" => "ubcs:characterMatrix",
102
+ "@context" => ld_context_matrix,
103
+ },
104
+ "trees" => {
105
+ "@id" => "ubcs:tree",
106
+ "@container" => "@index",
107
+ "@context" => ld_context_tree,
108
+ },
109
+ }
110
+
111
+ LD_TYPES = {
112
+ "Dphil::Character" => "ubcs:phyloCharacter",
113
+ "Dphil::CharacterMatrix" => "ubcs:characterMatrix",
114
+ "Dphil::TreeNode" => "ubcs:treeNode",
115
+ "Dphil::Tree" => "ubcs:tree",
116
+ "Dphil::LDDataSet" => "ubcs:dataSet",
117
+ }.deep_freeze
118
+
119
+ LD_CONTEXTS = {
120
+ "Dphil::Character" => ld_context_global.merge(ld_context_character),
121
+ "Dphil::CharacterMatrix" => ld_context_global.merge(ld_context_matrix),
122
+ "Dphil::TreeNode" => ld_context_global.merge(ld_context_tree_node),
123
+ "Dphil::Tree" => ld_context_global.merge(ld_context_tree),
124
+ "Dphil::LDDataSet" => ld_context_global.merge(ld_context_dataset),
125
+ }.deep_freeze
126
+ end
127
+ end
128
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dphil
4
+ #
5
+ # Base module for file converters (CSV, NEXUS, CollateX, etc.)
6
+ #
7
+ module Converter
8
+ private
9
+
10
+ # Load a file
11
+ def load_file(infile)
12
+ raise IOError, "File #{infile} not found." unless File.exist?(infile)
13
+ File.read(infile)
14
+ end
15
+
16
+ # Load a CSV file
17
+ def load_csv(infile, mode = "r")
18
+ raise IOError, "File #{infile} not found." unless File.exist?(infile)
19
+ CSV.read(infile, mode)
20
+ end
21
+
22
+ # Return a hash of array sorted/weighted by number of identical entries
23
+ def weighted_uniq(array)
24
+ weighted_hash = array.each_with_object({}) do |v, acc|
25
+ acc[v] ||= 0
26
+ acc[v] += 1
27
+ end
28
+ n = 0
29
+ (weighted_hash.sort_by do |x|
30
+ n += 1
31
+ [-x[1], n]
32
+ end).to_h
33
+ end
34
+
35
+ # Sanitize a character string to basic KH/ASCII
36
+ def sanitize_char(str)
37
+ str = str.to_s
38
+ src = Sanscript.detect(str) || :iast
39
+ str = Sanscript.transliterate(str, src, :kh)
40
+ str.gsub!(/\s/, "_")
41
+ str.tr!("'", "`")
42
+ str.strip!
43
+ str
44
+ end
45
+
46
+ # Tokenize the values of a character
47
+ def tokenize(characters)
48
+ char_set = weighted_uniq(characters.map { |c| sanitize_char(c) }.reject(&:empty?))
49
+ char_set.each_with_object({}).with_index do |(char, acc), i|
50
+ acc[char[0]] = [ALPHABET[i], char[1]]
51
+ end
52
+ end
53
+
54
+ # NEX Token Alphabet
55
+ ALPHABET = IceNine.deep_freeze(("A".."Z").to_a + ("a".."z").to_a)
56
+ private_constant :ALPHABET
57
+ end
58
+ end
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dphil
4
+ #
5
+ # CSV to NEXUS file converter class
6
+ #
7
+ class Csv2NexConverter
8
+ include Dphil::Converter
9
+
10
+ def initialize(csv_file, opts = {})
11
+ opts = opts.to_h
12
+
13
+ # Load csv file
14
+ @csv = load_csv(csv_file, "r:bom|utf-8")
15
+ @csv = @csv.transpose if opts[:transpose]
16
+
17
+ # Load paup file
18
+ if opts[:paup_data].nil?
19
+ opts[:paup_data] = File.join(GEM_ROOT, "vendor", "default_commands.paup")
20
+ end
21
+ @paup = load_file(opts[:paup_data])
22
+ @paup << "\n" unless @paup.blank? || @paup[-1] == "\n"
23
+ @paup.indent!(2)
24
+ @paup.freeze
25
+ end
26
+
27
+ # Perform the conversion and return a string result
28
+ def convert
29
+ # Setup taxa information and orientation
30
+ taxa_count = @csv.first.count
31
+ character_count = @csv.count - 1
32
+ taxa_labels = @csv.first.map { |name| name.to_s.strip.scrub.gsub(/[^A-Za-z0-9]/, "_") }
33
+
34
+ # Generate labels and matrix
35
+ character_labels = []
36
+ character_matrix = taxa_labels.map { |t| [t] }
37
+ (1..character_count).each do |r|
38
+ row = @csv[r]
39
+ token_hash = tokenize(row)
40
+ character_label = (token_hash.map do |k, _|
41
+ "'#{sanitize_char(k)}'"
42
+ end).join(" ")
43
+ character_labels << %(#{r} /#{character_label})
44
+ row.each_with_index do |charstate, i|
45
+ token = token_hash[sanitize_char(charstate)]
46
+ character_matrix[i] << (token.nil? ? "-" : token[0])
47
+ end
48
+ end
49
+ character_matrix.map! do |arr|
50
+ "#{arr.shift} #{arr.join('')}"
51
+ end
52
+
53
+ # Return NEXUS output
54
+ <<~NEXUS_EOF
55
+ #NEXUS
56
+
57
+ BEGIN TAXA;
58
+ TITLE Manuscripts;
59
+ DIMENSIONS NTAX=#{taxa_count};
60
+ TAXLABELS #{taxa_labels.join(' ')};
61
+ END;
62
+
63
+ BEGIN CHARACTERS;
64
+ TITLE Variant_Matrix;
65
+ DIMENSIONS NCHAR=#{character_count};
66
+ FORMAT DATATYPE = STANDARD RESPECTCASE GAP = - MISSING = ? SYMBOLS = "#{ALPHABET.join(' ')}";
67
+ CHARSTATELABELS #{character_labels.join(', ')};
68
+ MATRIX
69
+ #{character_matrix.join("\n ")}
70
+ ;
71
+
72
+ END;
73
+
74
+ BEGIN ASSUMPTIONS;
75
+ OPTIONS DEFTYPE = UNORD;
76
+ END;
77
+
78
+ BEGIN PAUP;
79
+ #{@paup}END;
80
+ NEXUS_EOF
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dphil
4
+ class LDDataSet
5
+ include Dphil::LDOutput
6
+
7
+ attr_reader :matrix, :trees
8
+
9
+ def initialize(matrix:, trees:)
10
+ @matrix = matrix
11
+ @trees = trees
12
+ end
13
+
14
+ def to_h
15
+ {
16
+ matrix: matrix,
17
+ trees: trees,
18
+ }
19
+ end
20
+
21
+ def as_json(options = nil)
22
+ to_h.as_json(options)
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dphil
4
+ #
5
+ # Mixin module for Linked Data output
6
+ #
7
+ # Requires that a class implements +#as_json+
8
+ #
9
+ module LDOutput
10
+ using Dphil::Refinements::NaturalSort
11
+ # Outputs a Linked Data Hash
12
+ def as_jsonld(**options)
13
+ ld = {
14
+ "@context" => options.delete(:context) || Constants::LD_CONTEXTS[self.class.name],
15
+ "@type" => options.delete(:ld_type) || Constants::LD_TYPES[self.class.name],
16
+ }.merge!(as_json(options))
17
+
18
+ ld_expanded = JSON::LD::API.expand(ld)
19
+ return ld_expanded if options[:compact] == false
20
+
21
+ ld_compact = JSON::LD::API.compact(ld_expanded, ld["@context"])
22
+ { "@context" => ld_compact.delete("@context") }.merge!(ld_compact.natural_sort_keys)
23
+ end
24
+
25
+ def to_jsonld(**options)
26
+ as_jsonld(options).to_json(options)
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dphil
4
+ # Public: A storage object for words and groups of words from TEI XML data.
5
+ # Also contains information about the source/location of the words.
6
+ # Immutable.
7
+ class Lemma
8
+ using ::Ragabash::Refinements
9
+ # Public: Returns the raw source data for the lemma.
10
+ attr_reader :source, :text, :page, :facs, :line, :index
11
+
12
+ # Public: Initialize a lemma object.
13
+ #
14
+ # source - XML data to initialize the lemma from
15
+ def initialize(source = "", index = nil)
16
+ @source = source.strip
17
+ @index = index
18
+
19
+ xml = Nokogiri::XML("<lemma>#{source}</lemma>") { |config| config.strict.noent }
20
+ xml.encoding = "UTF-8"
21
+
22
+ @text = xml.text.strip.gsub(/\-+\s*\-*/, "")
23
+ @page = xml.css("pb").map { |el| el.attr("n") }.join(",")
24
+ @facs = xml.css("pb").map { |el| el.attr("facs") }.join(",")
25
+ @line = xml.css("lb").map { |el| el.attr("n") }.join(",")
26
+ rescue Nokogiri::XML::SyntaxError => e
27
+ $stderr.puts "Error in Lemma.new(`#{source}`, ...): #{e}"
28
+ abort
29
+ end
30
+
31
+ def to_s
32
+ "(#{index}|#{page}:#{line}) #{text}"
33
+ end
34
+
35
+ def to_sym
36
+ "<Lemma>#{self}".to_sym
37
+ end
38
+
39
+ def ==(other)
40
+ return false unless other.is_a?(Dphil::Lemma)
41
+ source == other.source
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,179 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "nokogiri"
4
+
5
+ module Dphil
6
+ # An object containing a list of lemmata generated through SAX parsing of an
7
+ # XML document.
8
+ # Immutable.
9
+ class LemmaList < ::Nokogiri::XML::SAX::Document
10
+ using ::Ragabash::Refinements
11
+ include Enumerable
12
+
13
+ attr_reader :name
14
+
15
+ def initialize(source)
16
+ @members = []
17
+ source = source.to_s.strip
18
+ return if source.empty?
19
+ @lemma_ignore_start_tags = Set.new(%w[TEI text body pre post div])
20
+ @lemma_ignore_end_tags = @lemma_ignore_start_tags + Set.new(%w[pb lb])
21
+ @index = 0
22
+ @open_elements = []
23
+ @current_pb = []
24
+ @current_lb = []
25
+ @current_chars = ""
26
+ @current_lemma = []
27
+ @inside_hyphen = false
28
+ @empty_element = true
29
+
30
+ @parser = Nokogiri::XML::SAX::Parser.new(self)
31
+ @parser.parse(source)
32
+ end
33
+
34
+ def each(&block)
35
+ @members.each(&block)
36
+ end
37
+
38
+ def members(limit = nil)
39
+ return @members[0, limit] if limit.is_a? Numeric
40
+ @members
41
+ end
42
+
43
+ def [](*args)
44
+ @members[*args]
45
+ end
46
+
47
+ def get(index)
48
+ raise "Non-numeric index passed to Lemma.get" unless index.is_a? Numeric
49
+ if index < 1
50
+ warn "Minimum index of Lemma.get() is 1"
51
+ index = 1
52
+ end
53
+ @members[index - 1]
54
+ end
55
+
56
+ def size
57
+ @members.size
58
+ end
59
+
60
+ def to_s
61
+ @members.map(&:text).join("\n")
62
+ end
63
+
64
+ def cx_tokens
65
+ @members.map do |lemma|
66
+ out = {
67
+ t: lemma.text,
68
+ n: Transliterate.normalize_iast(lemma.text),
69
+ i: lemma.index,
70
+ p: lemma.page,
71
+ f: lemma.facs,
72
+ l: lemma.line,
73
+ }
74
+ warn "Token empty: #{out}" if out[:t].empty?
75
+ out
76
+ end
77
+ end
78
+
79
+ private
80
+
81
+ def start_element(name, attrs = [])
82
+ return if @lemma_ignore_start_tags.include?(name)
83
+
84
+ if %w[pb lb].include?(name)
85
+ el = gen_xmlel(name, attrs, true)
86
+ if @current_lemma.empty?
87
+ instance_variable_set("@current_#{name}", [el])
88
+ else
89
+ instance_variable_get("@current_#{name}") << el
90
+ end
91
+ else
92
+ el = gen_xmlel(name, attrs)
93
+ @open_elements << gen_xmlel(name, attrs)
94
+ end
95
+
96
+ @empty_element = true
97
+ @current_lemma << el unless el.empty?
98
+ end
99
+
100
+ def end_element(name)
101
+ return if @lemma_ignore_end_tags.include?(name)
102
+
103
+ if @empty_element
104
+ @current_lemma[-1] = @current_lemma[-1].gsub(%r{/*>\z}, "/>")
105
+ @empty_element = false
106
+ else
107
+ @current_lemma << "</#{name}>"
108
+ end
109
+ @open_elements.pop
110
+ end
111
+
112
+ def characters(string)
113
+ @empty_element = false
114
+ string.split(/(\s)/).reject(&:empty?).each do |lemma|
115
+ @current_chars += lemma.strip
116
+
117
+ if lemma.match?(/\-$/)
118
+ @inside_hyphen = true
119
+ elsif lemma.match?(/^\-?[^\s]/)
120
+ @inside_hyphen = false
121
+ end
122
+
123
+ if lemma.match(/^\s+$/) && !@inside_hyphen
124
+ finalize
125
+ next
126
+ end
127
+
128
+ text = lemma.strip
129
+ @current_lemma << text unless text.empty?
130
+ end
131
+ end
132
+
133
+ def end_document
134
+ finalize
135
+ (instance_variables - [:@members]).each do |var|
136
+ remove_instance_variable(var)
137
+ end
138
+ end
139
+
140
+ def gen_xmlel(name, attrs, self_closing = false)
141
+ attr_list = attrs.reduce("") do |result, attr|
142
+ %(#{result} #{attr[0]}="#{attr[1].gsub('"', '&quot;')}")
143
+ end
144
+ self_closing ? "<#{name}#{attr_list}/>" : "<#{name}#{attr_list}>"
145
+ end
146
+
147
+ def gen_xmlclose(el)
148
+ el.gsub(/^<([^\s\>]+).*/, '</\\1>')
149
+ end
150
+
151
+ def append_lemma
152
+ return unless @current_chars.match?(/[^\s\-\.\|]+/) # if not .empty?
153
+ new_lemma_source = @current_lemma.join("")
154
+ new_lemma = Lemma.new(new_lemma_source, @index)
155
+ @index += 1
156
+ @members << new_lemma
157
+ end
158
+
159
+ def finalize
160
+ return if @current_lemma.empty?
161
+ @current_lemma.unshift(@current_lb.first) unless @current_lemma[0] == @current_lb.first
162
+ @current_lemma.unshift(@current_pb.first) unless @current_lemma[0] == @current_pb.first
163
+
164
+ # Make sure missing open or close tags are inserted
165
+ unless @open_elements.empty?
166
+ @current_lemma.concat(@open_elements.reverse.map { |e| gen_xmlclose(e) })
167
+ prime_next = @open_elements.dup
168
+ end
169
+
170
+ append_lemma
171
+
172
+ @current_pb = [@current_pb.last]
173
+ @current_lb = [@current_lb.last]
174
+ @current_chars = ""
175
+ @current_lemma = prime_next || []
176
+ @inside_hyphen = false
177
+ end
178
+ end
179
+ end