dphil 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/CODE_OF_CONDUCT.md +49 -0
  3. data/Gemfile +6 -0
  4. data/LICENSE +201 -0
  5. data/README.md +54 -0
  6. data/Rakefile +11 -0
  7. data/dphil.gemspec +49 -0
  8. data/exe/dphil +10 -0
  9. data/lib/dphil.rb +53 -0
  10. data/lib/dphil/cache.rb +15 -0
  11. data/lib/dphil/change_list.rb +6 -0
  12. data/lib/dphil/character.rb +236 -0
  13. data/lib/dphil/character_matrix.rb +102 -0
  14. data/lib/dphil/cli.rb +26 -0
  15. data/lib/dphil/cli_commands/csv2ld.rb +71 -0
  16. data/lib/dphil/cli_commands/csv2nex.rb +37 -0
  17. data/lib/dphil/constants.rb +128 -0
  18. data/lib/dphil/converter.rb +58 -0
  19. data/lib/dphil/converters/csv2nex.rb +83 -0
  20. data/lib/dphil/ld_data_set.rb +25 -0
  21. data/lib/dphil/ld_output.rb +29 -0
  22. data/lib/dphil/lemma.rb +44 -0
  23. data/lib/dphil/lemma_list.rb +179 -0
  24. data/lib/dphil/log_formatter.rb +39 -0
  25. data/lib/dphil/logger.rb +27 -0
  26. data/lib/dphil/metrical_data.rb +78 -0
  27. data/lib/dphil/newick.rb +52 -0
  28. data/lib/dphil/paup.rb +34 -0
  29. data/lib/dphil/refinements.rb +8 -0
  30. data/lib/dphil/refinements/natural_sort.rb +52 -0
  31. data/lib/dphil/script_string.rb +124 -0
  32. data/lib/dphil/syllables.rb +43 -0
  33. data/lib/dphil/syllables/syllable.rb +45 -0
  34. data/lib/dphil/tei_xml.rb +142 -0
  35. data/lib/dphil/transliterate.rb +131 -0
  36. data/lib/dphil/tree.rb +142 -0
  37. data/lib/dphil/tree_node.rb +67 -0
  38. data/lib/dphil/verse.rb +25 -0
  39. data/lib/dphil/verse_analysis.rb +509 -0
  40. data/lib/dphil/verse_analysis_new.rb +816 -0
  41. data/lib/dphil/version.rb +30 -0
  42. data/vendor/default_commands.paup +18 -0
  43. data/vendor/metrical_data.yml +4035 -0
  44. metadata +409 -0
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "awesome_print"
4
+
5
+ module Dphil
6
+ class LogFormatter < ::Logger::Formatter
7
+ using ::Ragabash::Refinements
8
+
9
+ def colorize(severity, string)
10
+ color = SEVERITY_MAP[severity] || :none
11
+ String.new("#{COLOR_MAP[color]}#{string}#{COLOR_MAP[:none]}")
12
+ end
13
+
14
+ def call(severity, timestamp, progname, msg)
15
+ out = colorize(severity, "[#{timestamp.strftime('%Y-%m-%d %H:%M:%S %Z')}][v#{VERSION}] [#{severity}] ")
16
+ out << colorize("PROGNAME", "[#{progname}]") unless progname.nil?
17
+ "#{out}\n#{(msg.respond_to?(:to_str) ? msg : msg.ai(indent: -2))}\n"
18
+ end
19
+
20
+ COLOR_MAP = {
21
+ none: "\e[0m",
22
+ bold: "\e[1m",
23
+ red: "\e[31m",
24
+ yellow: "\e[33m",
25
+ green: "\e[32m",
26
+ cyan: "\e[36m",
27
+ }.freeze
28
+
29
+ SEVERITY_MAP = {
30
+ "ERROR" => :red,
31
+ "FATAL" => :red,
32
+ "WARN" => :yellow,
33
+ "INFO" => :green,
34
+ "DEBUG" => :cyan,
35
+ "PROGNAME" => :bold,
36
+ }.freeze
37
+ private_constant :COLOR_MAP, :SEVERITY_MAP
38
+ end
39
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "active_support/logger"
4
+
5
+ require "dphil/log_formatter"
6
+
7
+ # Namespace module definition
8
+ module Dphil
9
+ module_function
10
+
11
+ def logger
12
+ @logger ||= begin
13
+ if defined?(::Rails) && defined?(::Rails.logger)
14
+ ::Rails.logger
15
+ else
16
+ file_logger = ActiveSupport::Logger.new(File.join(GEM_ROOT, "dphil.log"))
17
+ file_logger.formatter = LogFormatter.new
18
+ if Constants::DEBUG
19
+ logger = ActiveSupport::Logger.new(STDERR)
20
+ logger.formatter = file_logger.formatter
21
+ file_logger.extend(ActiveSupport::Logger.broadcast(logger))
22
+ end
23
+ file_logger
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "psych"
4
+ require "hashie"
5
+
6
+ module Dphil
7
+ #
8
+ # Metrical Data structure imported and parsed from "metrical_data" module at:
9
+ # https://github.com/shreevatsa/sanskrit
10
+ #
11
+ module MetricalData
12
+ using ::Ragabash::Refinements
13
+ class << self
14
+ attr_reader :version, :meters, :patterns, :regexes, :all
15
+ end
16
+
17
+ private_class_method
18
+
19
+ # This loads and processes the data into the module.
20
+ def self.load_data!
21
+ yml_data = Psych.load_file(File.join(GEM_ROOT, "vendor", "metrical_data.yml"))
22
+
23
+ @version = yml_data["commit"].deep_freeze
24
+
25
+ # Hash of meters with names as keys and patterns as values
26
+ meters_h = yml_data["meters"].each_with_object({}) do |(name, patterns), h|
27
+ h[Transliterate.unicode_downcase(name)] = patterns
28
+ end
29
+ @meters = IHash.new(meters_h)
30
+
31
+ # Hash of meters with patterns for keys and names/padas as values
32
+ patterns_h = yml_data["patterns"].each_with_object({}) do |(type, patterns), type_h|
33
+ type_h[type.to_sym] = (patterns.each_with_object({}) do |(pattern, meters), pattern_h|
34
+ pattern_h[pattern] = meters.each_with_object({}) do |(name, value), name_h|
35
+ name_h[Transliterate.unicode_downcase(name)] = value
36
+ end
37
+ end).sort_by { |(k, _)| k.to_s.length }.reverse.to_h
38
+ end
39
+ @patterns = IHashM.new(patterns_h)
40
+
41
+ # Hash of meters with regular expressions for keys and names/padas as values
42
+ regexes_h = yml_data["regexes"].each_with_object({}) do |(type, patterns), type_h|
43
+ type_h[type.to_sym] = (patterns.each_with_object({}) do |(pattern, meters), pattern_h|
44
+ new_pattern = Regexp.new(pattern.source.gsub(/^\^|\$$/, ""))
45
+ pattern_h[new_pattern] = meters.each_with_object({}) do |(name, value), name_h|
46
+ name_h[Transliterate.unicode_downcase(name)] = value
47
+ end
48
+ end).sort_by { |(k, _)| k.to_s.length }.reverse.to_h
49
+ end
50
+ @regexes = IHashM.new(regexes_h)
51
+
52
+ @all = IHashM.new(version: version,
53
+ meters: meters,
54
+ patterns: patterns,
55
+ regexes: regexes)
56
+ self
57
+ end
58
+
59
+ # Immutable Hash
60
+ class IHash < ::Hash
61
+ include Hashie::Extensions::MergeInitializer
62
+
63
+ def initialize(*)
64
+ super
65
+ deep_freeze
66
+ end
67
+ end
68
+
69
+ # Immutable Hash with method access (for :full, :half, :pada hashes)
70
+ class IHashM < IHash
71
+ include Hashie::Extensions::MethodAccess
72
+ end
73
+
74
+ # Load the data when we load the module
75
+ # (but keep it in a method for cleanliness)
76
+ load_data!
77
+ end
78
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bio"
4
+
5
+ module Dphil
6
+ module NewickTree
7
+ module_function
8
+
9
+ def tree_from_nex(filename, tree_id: nil, taxa_map: nil) # rubocop:disable MethodLength
10
+ data = File.read(filename).to_s[/^\s*tree MajRule = \[&R\](.*)$/, 1]
11
+ tree = Bio::Newick.new(data).tree
12
+ new_taxa_id = (taxa_map&.keys&.max || 0) + 1
13
+ tree_hsh = tree.nodes.each_with_object({}) do |n, acc|
14
+ next if n == tree.root
15
+ id = taxa_map&.key(n.name)
16
+ if id.nil?
17
+ id = new_taxa_id
18
+ new_taxa_id += 1
19
+ end
20
+ acc[id] = n
21
+ end
22
+
23
+ tree_nodes = tree_hsh.each_with_object({}) do |(id, node), acc|
24
+ out = {
25
+ id: id,
26
+ name: node.name || "##{id}",
27
+ }
28
+
29
+ parent = tree.parent(node)
30
+ out[:parent] = tree_hsh.key(parent) || 0
31
+ out[:length] = tree.get_edge(node, parent)&.distance
32
+
33
+ out[:children] = tree.children(node).map do |n|
34
+ tree_hsh.key(n)
35
+ end
36
+ acc[id] = out
37
+ end
38
+
39
+ stats = {
40
+ length: nil,
41
+ ci: nil,
42
+ hi: nil,
43
+ ci_ex: nil,
44
+ hi_ex: nil,
45
+ ri: nil,
46
+ rc: nil,
47
+ }
48
+
49
+ Dphil::Tree.new(tree_id, nodes: tree_nodes, stats: stats)
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dphil
4
+ #
5
+ # PAUP* Log Processor
6
+ #
7
+ module PAUP
8
+ def self.parse_trees(infile)
9
+ infile = File.expand_path(infile)
10
+ return STDERR.puts("File #{infile} not found.") unless File.exist?(infile)
11
+
12
+ data = File.read(infile).to_s.split(/^Tree ([0-9]+)\:$/)
13
+ return data if data.empty?
14
+
15
+ hash = { preamble: data.shift.strip }
16
+
17
+ trees = {}
18
+ data.each_slice(2) do |k, v|
19
+ next trees[:remainder] = k if v.nil?
20
+ branches = v.match(BRANCH_REGEXP)&.captures
21
+ changes = v.match(CHGLIST_REGEXP)&.captures
22
+ arr = []
23
+ arr.concat(%i[lengths stats].zip(branches)) unless branches.nil?
24
+ arr << [:changes, changes[0]] unless branches.nil?
25
+ trees[k.to_i] = arr.to_h
26
+ end
27
+
28
+ hash.merge(trees)
29
+ end
30
+
31
+ BRANCH_REGEXP = /^Branch lengths and linkages.*?\n\-{40,}\n(.*?)\n\-{40,}\n^Sum.*?(^Tree length =.*?)\n\n/m
32
+ CHGLIST_REGEXP = /^Character change lists:.*?\n\-{40,}\n(.*?)\n\n/m
33
+ end
34
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "dphil/refinements/natural_sort"
4
+
5
+ module Dphil
6
+ module Refinements
7
+ end
8
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dphil
4
+ module Refinements
5
+ module NaturalSort
6
+ refine Hash do
7
+ def natural_sort_keys
8
+ sort_by_key(true) do |a, b|
9
+ NaturalSort.grouped_compare(a, b) || a <=> b
10
+ end
11
+ end
12
+
13
+ def sort_by_key(recursive = false, &block)
14
+ keys.sort(&block).each_with_object({}) do |key, acc|
15
+ acc[key] = self[key]
16
+ if recursive && acc[key].is_a?(Hash)
17
+ acc[key] = acc[key].sort_by_key(true, &block)
18
+ end
19
+ end
20
+ end
21
+ end
22
+
23
+ class << self
24
+ CMP_REGEX = /((?:@{1,2}|[\$\:])?\p{L}+(?:[^\p{L}\d\s]*))|(\d+)/
25
+ private_constant :CMP_REGEX
26
+
27
+ def grouped_compare(a, b) # rubocop:disable CyclomaticComplexity
28
+ a = a&.scan(CMP_REGEX)
29
+ b = b&.scan(CMP_REGEX)
30
+ return if a.blank? || b.blank?
31
+
32
+ ret = nil
33
+ [a.size, b.size].max.times do |index|
34
+ a_cmp = coerce_chunk(a[index]) || (return -1)
35
+ b_cmp = coerce_chunk(b[index]) || (return 1)
36
+ ret = a_cmp <=> b_cmp || (a.is_a?(Integer) && -1 || b.is_a?(Integer) && 1)
37
+ return ret unless ret == 0 # rubocop:disable NumericPredicate
38
+ end
39
+ ret
40
+ end
41
+
42
+ private
43
+
44
+ def coerce_chunk(chunk)
45
+ return if chunk.nil?
46
+ return chunk[0] unless chunk[0].nil?
47
+ Integer(chunk[1])
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,124 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "forwardable"
4
+
5
+ module Dphil
6
+ class ScriptString
7
+ using ::Ragabash::Refinements
8
+ extend Forwardable
9
+ def_delegators :@string, :<=>, :==, :===, :to_s, :to_str, :empty?, :length
10
+ attr_reader :string
11
+
12
+ def initialize(str, script = nil)
13
+ raise "Source must be a String" unless str.respond_to?(:to_str)
14
+ str = str.to_str
15
+ str = str.dup if str.frozen?
16
+ @string = str.encode!(Encoding::UTF_8)
17
+ self.script = script || self.script
18
+ end
19
+
20
+ def script
21
+ @script ||= Transliterate.detect(@string)
22
+ end
23
+
24
+ def script=(script)
25
+ @script = script.try(:flat_map, &:to_sym) || script.to_sym
26
+ end
27
+
28
+ def transliterate(target)
29
+ target = target.to_sym
30
+ string = Transliterate.transliterate(@string, from: @script, to: target)
31
+ if @script.is_a?(Array)
32
+ new_target = @script.dup
33
+ new_target[0] = target
34
+ new_target.uniq!
35
+ target = new_target
36
+ end
37
+ self.class.new(string, target)
38
+ end
39
+
40
+ def transliterate!(target)
41
+ target = target.to_sym
42
+ @string = Transliterate.transliterate(@string, from: @script, to: target)
43
+ if @script.is_a?(Array)
44
+ @script[0] = target
45
+ @script.uniq!
46
+ end
47
+ @string
48
+ end
49
+
50
+ # String methods implemented to return ScString intances wherever possible
51
+
52
+ def downcase
53
+ self.class.new(Transliterate.unicode_downcase(@string), @script)
54
+ end
55
+
56
+ def downcase!
57
+ ret_val = Transliterate.unicode_downcase!(@string)
58
+ self unless ret_val.nil?
59
+ end
60
+
61
+ def inspect
62
+ "#{@string.inspect}:#{script}"
63
+ end
64
+
65
+ def gsub(pattern, rep_hash = nil)
66
+ ret_val = if block_given?
67
+ @string.gsub(pattern, &Proc.new)
68
+ elsif !rep_hash.nil?
69
+ @string.gsub(pattern, rep_hash)
70
+ else
71
+ @string.gsub(pattern)
72
+ end
73
+ return ret_val if ret_val.is_a?(Enumerator)
74
+ self.class.new(ret_val, @script)
75
+ end
76
+
77
+ def gsub!(pattern, rep_hash = nil)
78
+ ret_val = if block_given?
79
+ @string.gsub!(pattern, &Proc.new)
80
+ elsif !rep_hash.nil?
81
+ @string.gsub!(pattern, rep_hash)
82
+ else
83
+ @string.gsub!(pattern)
84
+ end
85
+ return ret_val if ret_val.is_a?(Enumerator)
86
+ self unless ret_val.nil?
87
+ end
88
+
89
+ def scan(pattern)
90
+ ret_val = if block_given?
91
+ @string.scan(pattern, &Proc.new)
92
+ else
93
+ @string.scan(pattern)
94
+ end
95
+ return self if ret_val == @string
96
+ ret_val.map do |match|
97
+ next self.class.new(match, @script) if match.is_a?(String)
98
+ match.map do |group|
99
+ self.class.new(group, @script)
100
+ end
101
+ end
102
+ end
103
+
104
+ def slice(a, b = nil)
105
+ slice = b.nil? ? @string.slice(a) : @string.slice(a, b)
106
+ self.class.new(slice, @script)
107
+ end
108
+ alias [] slice
109
+
110
+ def slice!(a, b = nil)
111
+ slice = b.nil? ? @string.slice!(a) : @string.slice!(a, b)
112
+ self.class.new(slice, @script)
113
+ end
114
+
115
+ def strip
116
+ self.class.new(@string.strip, @script)
117
+ end
118
+
119
+ def strip!
120
+ ret_val = @string.strip!
121
+ self unless ret_val.nil?
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "forwardable"
4
+
5
+ require "dphil/syllables/syllable"
6
+
7
+ module Dphil
8
+ class Syllables
9
+ using ::Ragabash::Refinements
10
+ include Enumerable
11
+ extend Forwardable
12
+ def_delegators :@syllables, :[], :each, :first, :last, :length
13
+
14
+ attr_reader :source, :source_script, :weights, :syllables
15
+
16
+ def initialize(source, source_script: nil)
17
+ @source = source.to_str.safe_copy.freeze
18
+ @source_script = source_script || Transliterate.detect(@source) || Transliterate.default_script
19
+ slp1_syllables = VerseAnalysis.syllables(@source, from: @source_script, to: :slp1)
20
+ @weights = VerseAnalysis.syllables_weights(slp1_syllables, from: :slp1, contextual: true).freeze
21
+ @syllables = (slp1_syllables.map.with_index do |syl, i|
22
+ source = @source_script == :slp1 ? syl : Transliterate.t(syl, :slp1, @source_script)
23
+ Syllables::Syllable.new(source, @weights[i], parent: self, index: i, slp1: syl)
24
+ end).freeze
25
+ end
26
+
27
+ def inspect
28
+ "<Syllables \"#{@source}\":#{@source_script} (#{@weights}) (#{@syllables.count}) => #{@syllables.inspect}>"
29
+ end
30
+
31
+ def to_a
32
+ @syllables.map { |syl| Transliterate.t(syl.source, :slp1, @source_script) }
33
+ end
34
+
35
+ def to_s
36
+ @source.dup
37
+ end
38
+
39
+ def simple_weights
40
+ @simple_weights ||= @weights.upcase.freeze
41
+ end
42
+ end
43
+ end