dphil 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/CODE_OF_CONDUCT.md +49 -0
  3. data/Gemfile +6 -0
  4. data/LICENSE +201 -0
  5. data/README.md +54 -0
  6. data/Rakefile +11 -0
  7. data/dphil.gemspec +49 -0
  8. data/exe/dphil +10 -0
  9. data/lib/dphil.rb +53 -0
  10. data/lib/dphil/cache.rb +15 -0
  11. data/lib/dphil/change_list.rb +6 -0
  12. data/lib/dphil/character.rb +236 -0
  13. data/lib/dphil/character_matrix.rb +102 -0
  14. data/lib/dphil/cli.rb +26 -0
  15. data/lib/dphil/cli_commands/csv2ld.rb +71 -0
  16. data/lib/dphil/cli_commands/csv2nex.rb +37 -0
  17. data/lib/dphil/constants.rb +128 -0
  18. data/lib/dphil/converter.rb +58 -0
  19. data/lib/dphil/converters/csv2nex.rb +83 -0
  20. data/lib/dphil/ld_data_set.rb +25 -0
  21. data/lib/dphil/ld_output.rb +29 -0
  22. data/lib/dphil/lemma.rb +44 -0
  23. data/lib/dphil/lemma_list.rb +179 -0
  24. data/lib/dphil/log_formatter.rb +39 -0
  25. data/lib/dphil/logger.rb +27 -0
  26. data/lib/dphil/metrical_data.rb +78 -0
  27. data/lib/dphil/newick.rb +52 -0
  28. data/lib/dphil/paup.rb +34 -0
  29. data/lib/dphil/refinements.rb +8 -0
  30. data/lib/dphil/refinements/natural_sort.rb +52 -0
  31. data/lib/dphil/script_string.rb +124 -0
  32. data/lib/dphil/syllables.rb +43 -0
  33. data/lib/dphil/syllables/syllable.rb +45 -0
  34. data/lib/dphil/tei_xml.rb +142 -0
  35. data/lib/dphil/transliterate.rb +131 -0
  36. data/lib/dphil/tree.rb +142 -0
  37. data/lib/dphil/tree_node.rb +67 -0
  38. data/lib/dphil/verse.rb +25 -0
  39. data/lib/dphil/verse_analysis.rb +509 -0
  40. data/lib/dphil/verse_analysis_new.rb +816 -0
  41. data/lib/dphil/version.rb +30 -0
  42. data/vendor/default_commands.paup +18 -0
  43. data/vendor/metrical_data.yml +4035 -0
  44. metadata +409 -0
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "awesome_print"
4
+
5
+ module Dphil
6
+ class LogFormatter < ::Logger::Formatter
7
+ using ::Ragabash::Refinements
8
+
9
+ def colorize(severity, string)
10
+ color = SEVERITY_MAP[severity] || :none
11
+ String.new("#{COLOR_MAP[color]}#{string}#{COLOR_MAP[:none]}")
12
+ end
13
+
14
+ def call(severity, timestamp, progname, msg)
15
+ out = colorize(severity, "[#{timestamp.strftime('%Y-%m-%d %H:%M:%S %Z')}][v#{VERSION}] [#{severity}] ")
16
+ out << colorize("PROGNAME", "[#{progname}]") unless progname.nil?
17
+ "#{out}\n#{(msg.respond_to?(:to_str) ? msg : msg.ai(indent: -2))}\n"
18
+ end
19
+
20
+ COLOR_MAP = {
21
+ none: "\e[0m",
22
+ bold: "\e[1m",
23
+ red: "\e[31m",
24
+ yellow: "\e[33m",
25
+ green: "\e[32m",
26
+ cyan: "\e[36m",
27
+ }.freeze
28
+
29
+ SEVERITY_MAP = {
30
+ "ERROR" => :red,
31
+ "FATAL" => :red,
32
+ "WARN" => :yellow,
33
+ "INFO" => :green,
34
+ "DEBUG" => :cyan,
35
+ "PROGNAME" => :bold,
36
+ }.freeze
37
+ private_constant :COLOR_MAP, :SEVERITY_MAP
38
+ end
39
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "active_support/logger"
4
+
5
+ require "dphil/log_formatter"
6
+
7
+ # Namespace module definition
8
+ module Dphil
9
+ module_function
10
+
11
+ def logger
12
+ @logger ||= begin
13
+ if defined?(::Rails) && defined?(::Rails.logger)
14
+ ::Rails.logger
15
+ else
16
+ file_logger = ActiveSupport::Logger.new(File.join(GEM_ROOT, "dphil.log"))
17
+ file_logger.formatter = LogFormatter.new
18
+ if Constants::DEBUG
19
+ logger = ActiveSupport::Logger.new(STDERR)
20
+ logger.formatter = file_logger.formatter
21
+ file_logger.extend(ActiveSupport::Logger.broadcast(logger))
22
+ end
23
+ file_logger
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "psych"
4
+ require "hashie"
5
+
6
+ module Dphil
7
+ #
8
+ # Metrical Data structure imported and parsed from "metrical_data" module at:
9
+ # https://github.com/shreevatsa/sanskrit
10
+ #
11
+ module MetricalData
12
+ using ::Ragabash::Refinements
13
+ class << self
14
+ attr_reader :version, :meters, :patterns, :regexes, :all
15
+ end
16
+
17
+ private_class_method
18
+
19
+ # This loads and processes the data into the module.
20
+ def self.load_data!
21
+ yml_data = Psych.load_file(File.join(GEM_ROOT, "vendor", "metrical_data.yml"))
22
+
23
+ @version = yml_data["commit"].deep_freeze
24
+
25
+ # Hash of meters with names as keys and patterns as values
26
+ meters_h = yml_data["meters"].each_with_object({}) do |(name, patterns), h|
27
+ h[Transliterate.unicode_downcase(name)] = patterns
28
+ end
29
+ @meters = IHash.new(meters_h)
30
+
31
+ # Hash of meters with patterns for keys and names/padas as values
32
+ patterns_h = yml_data["patterns"].each_with_object({}) do |(type, patterns), type_h|
33
+ type_h[type.to_sym] = (patterns.each_with_object({}) do |(pattern, meters), pattern_h|
34
+ pattern_h[pattern] = meters.each_with_object({}) do |(name, value), name_h|
35
+ name_h[Transliterate.unicode_downcase(name)] = value
36
+ end
37
+ end).sort_by { |(k, _)| k.to_s.length }.reverse.to_h
38
+ end
39
+ @patterns = IHashM.new(patterns_h)
40
+
41
+ # Hash of meters with regular expressions for keys and names/padas as values
42
+ regexes_h = yml_data["regexes"].each_with_object({}) do |(type, patterns), type_h|
43
+ type_h[type.to_sym] = (patterns.each_with_object({}) do |(pattern, meters), pattern_h|
44
+ new_pattern = Regexp.new(pattern.source.gsub(/^\^|\$$/, ""))
45
+ pattern_h[new_pattern] = meters.each_with_object({}) do |(name, value), name_h|
46
+ name_h[Transliterate.unicode_downcase(name)] = value
47
+ end
48
+ end).sort_by { |(k, _)| k.to_s.length }.reverse.to_h
49
+ end
50
+ @regexes = IHashM.new(regexes_h)
51
+
52
+ @all = IHashM.new(version: version,
53
+ meters: meters,
54
+ patterns: patterns,
55
+ regexes: regexes)
56
+ self
57
+ end
58
+
59
+ # Immutable Hash
60
+ class IHash < ::Hash
61
+ include Hashie::Extensions::MergeInitializer
62
+
63
+ def initialize(*)
64
+ super
65
+ deep_freeze
66
+ end
67
+ end
68
+
69
+ # Immutable Hash with method access (for :full, :half, :pada hashes)
70
+ class IHashM < IHash
71
+ include Hashie::Extensions::MethodAccess
72
+ end
73
+
74
+ # Load the data when we load the module
75
+ # (but keep it in a method for cleanliness)
76
+ load_data!
77
+ end
78
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bio"
4
+
5
+ module Dphil
6
+ module NewickTree
7
+ module_function
8
+
9
+ def tree_from_nex(filename, tree_id: nil, taxa_map: nil) # rubocop:disable MethodLength
10
+ data = File.read(filename).to_s[/^\s*tree MajRule = \[&R\](.*)$/, 1]
11
+ tree = Bio::Newick.new(data).tree
12
+ new_taxa_id = (taxa_map&.keys&.max || 0) + 1
13
+ tree_hsh = tree.nodes.each_with_object({}) do |n, acc|
14
+ next if n == tree.root
15
+ id = taxa_map&.key(n.name)
16
+ if id.nil?
17
+ id = new_taxa_id
18
+ new_taxa_id += 1
19
+ end
20
+ acc[id] = n
21
+ end
22
+
23
+ tree_nodes = tree_hsh.each_with_object({}) do |(id, node), acc|
24
+ out = {
25
+ id: id,
26
+ name: node.name || "##{id}",
27
+ }
28
+
29
+ parent = tree.parent(node)
30
+ out[:parent] = tree_hsh.key(parent) || 0
31
+ out[:length] = tree.get_edge(node, parent)&.distance
32
+
33
+ out[:children] = tree.children(node).map do |n|
34
+ tree_hsh.key(n)
35
+ end
36
+ acc[id] = out
37
+ end
38
+
39
+ stats = {
40
+ length: nil,
41
+ ci: nil,
42
+ hi: nil,
43
+ ci_ex: nil,
44
+ hi_ex: nil,
45
+ ri: nil,
46
+ rc: nil,
47
+ }
48
+
49
+ Dphil::Tree.new(tree_id, nodes: tree_nodes, stats: stats)
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dphil
4
+ #
5
+ # PAUP* Log Processor
6
+ #
7
+ module PAUP
8
+ def self.parse_trees(infile)
9
+ infile = File.expand_path(infile)
10
+ return STDERR.puts("File #{infile} not found.") unless File.exist?(infile)
11
+
12
+ data = File.read(infile).to_s.split(/^Tree ([0-9]+)\:$/)
13
+ return data if data.empty?
14
+
15
+ hash = { preamble: data.shift.strip }
16
+
17
+ trees = {}
18
+ data.each_slice(2) do |k, v|
19
+ next trees[:remainder] = k if v.nil?
20
+ branches = v.match(BRANCH_REGEXP)&.captures
21
+ changes = v.match(CHGLIST_REGEXP)&.captures
22
+ arr = []
23
+ arr.concat(%i[lengths stats].zip(branches)) unless branches.nil?
24
+ arr << [:changes, changes[0]] unless branches.nil?
25
+ trees[k.to_i] = arr.to_h
26
+ end
27
+
28
+ hash.merge(trees)
29
+ end
30
+
31
+ BRANCH_REGEXP = /^Branch lengths and linkages.*?\n\-{40,}\n(.*?)\n\-{40,}\n^Sum.*?(^Tree length =.*?)\n\n/m
32
+ CHGLIST_REGEXP = /^Character change lists:.*?\n\-{40,}\n(.*?)\n\n/m
33
+ end
34
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "dphil/refinements/natural_sort"
4
+
5
+ module Dphil
6
+ module Refinements
7
+ end
8
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dphil
4
+ module Refinements
5
+ module NaturalSort
6
+ refine Hash do
7
+ def natural_sort_keys
8
+ sort_by_key(true) do |a, b|
9
+ NaturalSort.grouped_compare(a, b) || a <=> b
10
+ end
11
+ end
12
+
13
+ def sort_by_key(recursive = false, &block)
14
+ keys.sort(&block).each_with_object({}) do |key, acc|
15
+ acc[key] = self[key]
16
+ if recursive && acc[key].is_a?(Hash)
17
+ acc[key] = acc[key].sort_by_key(true, &block)
18
+ end
19
+ end
20
+ end
21
+ end
22
+
23
+ class << self
24
+ CMP_REGEX = /((?:@{1,2}|[\$\:])?\p{L}+(?:[^\p{L}\d\s]*))|(\d+)/
25
+ private_constant :CMP_REGEX
26
+
27
+ def grouped_compare(a, b) # rubocop:disable CyclomaticComplexity
28
+ a = a&.scan(CMP_REGEX)
29
+ b = b&.scan(CMP_REGEX)
30
+ return if a.blank? || b.blank?
31
+
32
+ ret = nil
33
+ [a.size, b.size].max.times do |index|
34
+ a_cmp = coerce_chunk(a[index]) || (return -1)
35
+ b_cmp = coerce_chunk(b[index]) || (return 1)
36
+ ret = a_cmp <=> b_cmp || (a.is_a?(Integer) && -1 || b.is_a?(Integer) && 1)
37
+ return ret unless ret == 0 # rubocop:disable NumericPredicate
38
+ end
39
+ ret
40
+ end
41
+
42
+ private
43
+
44
+ def coerce_chunk(chunk)
45
+ return if chunk.nil?
46
+ return chunk[0] unless chunk[0].nil?
47
+ Integer(chunk[1])
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,124 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "forwardable"
4
+
5
+ module Dphil
6
+ class ScriptString
7
+ using ::Ragabash::Refinements
8
+ extend Forwardable
9
+ def_delegators :@string, :<=>, :==, :===, :to_s, :to_str, :empty?, :length
10
+ attr_reader :string
11
+
12
+ def initialize(str, script = nil)
13
+ raise "Source must be a String" unless str.respond_to?(:to_str)
14
+ str = str.to_str
15
+ str = str.dup if str.frozen?
16
+ @string = str.encode!(Encoding::UTF_8)
17
+ self.script = script || self.script
18
+ end
19
+
20
+ def script
21
+ @script ||= Transliterate.detect(@string)
22
+ end
23
+
24
+ def script=(script)
25
+ @script = script.try(:flat_map, &:to_sym) || script.to_sym
26
+ end
27
+
28
+ def transliterate(target)
29
+ target = target.to_sym
30
+ string = Transliterate.transliterate(@string, from: @script, to: target)
31
+ if @script.is_a?(Array)
32
+ new_target = @script.dup
33
+ new_target[0] = target
34
+ new_target.uniq!
35
+ target = new_target
36
+ end
37
+ self.class.new(string, target)
38
+ end
39
+
40
+ def transliterate!(target)
41
+ target = target.to_sym
42
+ @string = Transliterate.transliterate(@string, from: @script, to: target)
43
+ if @script.is_a?(Array)
44
+ @script[0] = target
45
+ @script.uniq!
46
+ end
47
+ @string
48
+ end
49
+
50
+ # String methods implemented to return ScString intances wherever possible
51
+
52
+ def downcase
53
+ self.class.new(Transliterate.unicode_downcase(@string), @script)
54
+ end
55
+
56
+ def downcase!
57
+ ret_val = Transliterate.unicode_downcase!(@string)
58
+ self unless ret_val.nil?
59
+ end
60
+
61
+ def inspect
62
+ "#{@string.inspect}:#{script}"
63
+ end
64
+
65
+ def gsub(pattern, rep_hash = nil)
66
+ ret_val = if block_given?
67
+ @string.gsub(pattern, &Proc.new)
68
+ elsif !rep_hash.nil?
69
+ @string.gsub(pattern, rep_hash)
70
+ else
71
+ @string.gsub(pattern)
72
+ end
73
+ return ret_val if ret_val.is_a?(Enumerator)
74
+ self.class.new(ret_val, @script)
75
+ end
76
+
77
+ def gsub!(pattern, rep_hash = nil)
78
+ ret_val = if block_given?
79
+ @string.gsub!(pattern, &Proc.new)
80
+ elsif !rep_hash.nil?
81
+ @string.gsub!(pattern, rep_hash)
82
+ else
83
+ @string.gsub!(pattern)
84
+ end
85
+ return ret_val if ret_val.is_a?(Enumerator)
86
+ self unless ret_val.nil?
87
+ end
88
+
89
+ def scan(pattern)
90
+ ret_val = if block_given?
91
+ @string.scan(pattern, &Proc.new)
92
+ else
93
+ @string.scan(pattern)
94
+ end
95
+ return self if ret_val == @string
96
+ ret_val.map do |match|
97
+ next self.class.new(match, @script) if match.is_a?(String)
98
+ match.map do |group|
99
+ self.class.new(group, @script)
100
+ end
101
+ end
102
+ end
103
+
104
+ def slice(a, b = nil)
105
+ slice = b.nil? ? @string.slice(a) : @string.slice(a, b)
106
+ self.class.new(slice, @script)
107
+ end
108
+ alias [] slice
109
+
110
+ def slice!(a, b = nil)
111
+ slice = b.nil? ? @string.slice!(a) : @string.slice!(a, b)
112
+ self.class.new(slice, @script)
113
+ end
114
+
115
+ def strip
116
+ self.class.new(@string.strip, @script)
117
+ end
118
+
119
+ def strip!
120
+ ret_val = @string.strip!
121
+ self unless ret_val.nil?
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "forwardable"
4
+
5
+ require "dphil/syllables/syllable"
6
+
7
+ module Dphil
8
+ class Syllables
9
+ using ::Ragabash::Refinements
10
+ include Enumerable
11
+ extend Forwardable
12
+ def_delegators :@syllables, :[], :each, :first, :last, :length
13
+
14
+ attr_reader :source, :source_script, :weights, :syllables
15
+
16
+ def initialize(source, source_script: nil)
17
+ @source = source.to_str.safe_copy.freeze
18
+ @source_script = source_script || Transliterate.detect(@source) || Transliterate.default_script
19
+ slp1_syllables = VerseAnalysis.syllables(@source, from: @source_script, to: :slp1)
20
+ @weights = VerseAnalysis.syllables_weights(slp1_syllables, from: :slp1, contextual: true).freeze
21
+ @syllables = (slp1_syllables.map.with_index do |syl, i|
22
+ source = @source_script == :slp1 ? syl : Transliterate.t(syl, :slp1, @source_script)
23
+ Syllables::Syllable.new(source, @weights[i], parent: self, index: i, slp1: syl)
24
+ end).freeze
25
+ end
26
+
27
+ def inspect
28
+ "<Syllables \"#{@source}\":#{@source_script} (#{@weights}) (#{@syllables.count}) => #{@syllables.inspect}>"
29
+ end
30
+
31
+ def to_a
32
+ @syllables.map { |syl| Transliterate.t(syl.source, :slp1, @source_script) }
33
+ end
34
+
35
+ def to_s
36
+ @source.dup
37
+ end
38
+
39
+ def simple_weights
40
+ @simple_weights ||= @weights.upcase.freeze
41
+ end
42
+ end
43
+ end