dphil 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CODE_OF_CONDUCT.md +49 -0
- data/Gemfile +6 -0
- data/LICENSE +201 -0
- data/README.md +54 -0
- data/Rakefile +11 -0
- data/dphil.gemspec +49 -0
- data/exe/dphil +10 -0
- data/lib/dphil.rb +53 -0
- data/lib/dphil/cache.rb +15 -0
- data/lib/dphil/change_list.rb +6 -0
- data/lib/dphil/character.rb +236 -0
- data/lib/dphil/character_matrix.rb +102 -0
- data/lib/dphil/cli.rb +26 -0
- data/lib/dphil/cli_commands/csv2ld.rb +71 -0
- data/lib/dphil/cli_commands/csv2nex.rb +37 -0
- data/lib/dphil/constants.rb +128 -0
- data/lib/dphil/converter.rb +58 -0
- data/lib/dphil/converters/csv2nex.rb +83 -0
- data/lib/dphil/ld_data_set.rb +25 -0
- data/lib/dphil/ld_output.rb +29 -0
- data/lib/dphil/lemma.rb +44 -0
- data/lib/dphil/lemma_list.rb +179 -0
- data/lib/dphil/log_formatter.rb +39 -0
- data/lib/dphil/logger.rb +27 -0
- data/lib/dphil/metrical_data.rb +78 -0
- data/lib/dphil/newick.rb +52 -0
- data/lib/dphil/paup.rb +34 -0
- data/lib/dphil/refinements.rb +8 -0
- data/lib/dphil/refinements/natural_sort.rb +52 -0
- data/lib/dphil/script_string.rb +124 -0
- data/lib/dphil/syllables.rb +43 -0
- data/lib/dphil/syllables/syllable.rb +45 -0
- data/lib/dphil/tei_xml.rb +142 -0
- data/lib/dphil/transliterate.rb +131 -0
- data/lib/dphil/tree.rb +142 -0
- data/lib/dphil/tree_node.rb +67 -0
- data/lib/dphil/verse.rb +25 -0
- data/lib/dphil/verse_analysis.rb +509 -0
- data/lib/dphil/verse_analysis_new.rb +816 -0
- data/lib/dphil/version.rb +30 -0
- data/vendor/default_commands.paup +18 -0
- data/vendor/metrical_data.yml +4035 -0
- metadata +409 -0
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "awesome_print"
|
4
|
+
|
5
|
+
module Dphil
|
6
|
+
class LogFormatter < ::Logger::Formatter
|
7
|
+
using ::Ragabash::Refinements
|
8
|
+
|
9
|
+
def colorize(severity, string)
|
10
|
+
color = SEVERITY_MAP[severity] || :none
|
11
|
+
String.new("#{COLOR_MAP[color]}#{string}#{COLOR_MAP[:none]}")
|
12
|
+
end
|
13
|
+
|
14
|
+
def call(severity, timestamp, progname, msg)
|
15
|
+
out = colorize(severity, "[#{timestamp.strftime('%Y-%m-%d %H:%M:%S %Z')}][v#{VERSION}] [#{severity}] ")
|
16
|
+
out << colorize("PROGNAME", "[#{progname}]") unless progname.nil?
|
17
|
+
"#{out}\n#{(msg.respond_to?(:to_str) ? msg : msg.ai(indent: -2))}\n"
|
18
|
+
end
|
19
|
+
|
20
|
+
COLOR_MAP = {
|
21
|
+
none: "\e[0m",
|
22
|
+
bold: "\e[1m",
|
23
|
+
red: "\e[31m",
|
24
|
+
yellow: "\e[33m",
|
25
|
+
green: "\e[32m",
|
26
|
+
cyan: "\e[36m",
|
27
|
+
}.freeze
|
28
|
+
|
29
|
+
SEVERITY_MAP = {
|
30
|
+
"ERROR" => :red,
|
31
|
+
"FATAL" => :red,
|
32
|
+
"WARN" => :yellow,
|
33
|
+
"INFO" => :green,
|
34
|
+
"DEBUG" => :cyan,
|
35
|
+
"PROGNAME" => :bold,
|
36
|
+
}.freeze
|
37
|
+
private_constant :COLOR_MAP, :SEVERITY_MAP
|
38
|
+
end
|
39
|
+
end
|
data/lib/dphil/logger.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "active_support/logger"
|
4
|
+
|
5
|
+
require "dphil/log_formatter"
|
6
|
+
|
7
|
+
# Namespace module definition
|
8
|
+
module Dphil
|
9
|
+
module_function
|
10
|
+
|
11
|
+
def logger
|
12
|
+
@logger ||= begin
|
13
|
+
if defined?(::Rails) && defined?(::Rails.logger)
|
14
|
+
::Rails.logger
|
15
|
+
else
|
16
|
+
file_logger = ActiveSupport::Logger.new(File.join(GEM_ROOT, "dphil.log"))
|
17
|
+
file_logger.formatter = LogFormatter.new
|
18
|
+
if Constants::DEBUG
|
19
|
+
logger = ActiveSupport::Logger.new(STDERR)
|
20
|
+
logger.formatter = file_logger.formatter
|
21
|
+
file_logger.extend(ActiveSupport::Logger.broadcast(logger))
|
22
|
+
end
|
23
|
+
file_logger
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "psych"
|
4
|
+
require "hashie"
|
5
|
+
|
6
|
+
module Dphil
|
7
|
+
#
|
8
|
+
# Metrical Data structure imported and parsed from "metrical_data" module at:
|
9
|
+
# https://github.com/shreevatsa/sanskrit
|
10
|
+
#
|
11
|
+
module MetricalData
|
12
|
+
using ::Ragabash::Refinements
|
13
|
+
class << self
|
14
|
+
attr_reader :version, :meters, :patterns, :regexes, :all
|
15
|
+
end
|
16
|
+
|
17
|
+
private_class_method
|
18
|
+
|
19
|
+
# This loads and processes the data into the module.
|
20
|
+
def self.load_data!
|
21
|
+
yml_data = Psych.load_file(File.join(GEM_ROOT, "vendor", "metrical_data.yml"))
|
22
|
+
|
23
|
+
@version = yml_data["commit"].deep_freeze
|
24
|
+
|
25
|
+
# Hash of meters with names as keys and patterns as values
|
26
|
+
meters_h = yml_data["meters"].each_with_object({}) do |(name, patterns), h|
|
27
|
+
h[Transliterate.unicode_downcase(name)] = patterns
|
28
|
+
end
|
29
|
+
@meters = IHash.new(meters_h)
|
30
|
+
|
31
|
+
# Hash of meters with patterns for keys and names/padas as values
|
32
|
+
patterns_h = yml_data["patterns"].each_with_object({}) do |(type, patterns), type_h|
|
33
|
+
type_h[type.to_sym] = (patterns.each_with_object({}) do |(pattern, meters), pattern_h|
|
34
|
+
pattern_h[pattern] = meters.each_with_object({}) do |(name, value), name_h|
|
35
|
+
name_h[Transliterate.unicode_downcase(name)] = value
|
36
|
+
end
|
37
|
+
end).sort_by { |(k, _)| k.to_s.length }.reverse.to_h
|
38
|
+
end
|
39
|
+
@patterns = IHashM.new(patterns_h)
|
40
|
+
|
41
|
+
# Hash of meters with regular expressions for keys and names/padas as values
|
42
|
+
regexes_h = yml_data["regexes"].each_with_object({}) do |(type, patterns), type_h|
|
43
|
+
type_h[type.to_sym] = (patterns.each_with_object({}) do |(pattern, meters), pattern_h|
|
44
|
+
new_pattern = Regexp.new(pattern.source.gsub(/^\^|\$$/, ""))
|
45
|
+
pattern_h[new_pattern] = meters.each_with_object({}) do |(name, value), name_h|
|
46
|
+
name_h[Transliterate.unicode_downcase(name)] = value
|
47
|
+
end
|
48
|
+
end).sort_by { |(k, _)| k.to_s.length }.reverse.to_h
|
49
|
+
end
|
50
|
+
@regexes = IHashM.new(regexes_h)
|
51
|
+
|
52
|
+
@all = IHashM.new(version: version,
|
53
|
+
meters: meters,
|
54
|
+
patterns: patterns,
|
55
|
+
regexes: regexes)
|
56
|
+
self
|
57
|
+
end
|
58
|
+
|
59
|
+
# Immutable Hash
|
60
|
+
class IHash < ::Hash
|
61
|
+
include Hashie::Extensions::MergeInitializer
|
62
|
+
|
63
|
+
def initialize(*)
|
64
|
+
super
|
65
|
+
deep_freeze
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# Immutable Hash with method access (for :full, :half, :pada hashes)
|
70
|
+
class IHashM < IHash
|
71
|
+
include Hashie::Extensions::MethodAccess
|
72
|
+
end
|
73
|
+
|
74
|
+
# Load the data when we load the module
|
75
|
+
# (but keep it in a method for cleanliness)
|
76
|
+
load_data!
|
77
|
+
end
|
78
|
+
end
|
data/lib/dphil/newick.rb
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "bio"
|
4
|
+
|
5
|
+
module Dphil
|
6
|
+
module NewickTree
|
7
|
+
module_function
|
8
|
+
|
9
|
+
def tree_from_nex(filename, tree_id: nil, taxa_map: nil) # rubocop:disable MethodLength
|
10
|
+
data = File.read(filename).to_s[/^\s*tree MajRule = \[&R\](.*)$/, 1]
|
11
|
+
tree = Bio::Newick.new(data).tree
|
12
|
+
new_taxa_id = (taxa_map&.keys&.max || 0) + 1
|
13
|
+
tree_hsh = tree.nodes.each_with_object({}) do |n, acc|
|
14
|
+
next if n == tree.root
|
15
|
+
id = taxa_map&.key(n.name)
|
16
|
+
if id.nil?
|
17
|
+
id = new_taxa_id
|
18
|
+
new_taxa_id += 1
|
19
|
+
end
|
20
|
+
acc[id] = n
|
21
|
+
end
|
22
|
+
|
23
|
+
tree_nodes = tree_hsh.each_with_object({}) do |(id, node), acc|
|
24
|
+
out = {
|
25
|
+
id: id,
|
26
|
+
name: node.name || "##{id}",
|
27
|
+
}
|
28
|
+
|
29
|
+
parent = tree.parent(node)
|
30
|
+
out[:parent] = tree_hsh.key(parent) || 0
|
31
|
+
out[:length] = tree.get_edge(node, parent)&.distance
|
32
|
+
|
33
|
+
out[:children] = tree.children(node).map do |n|
|
34
|
+
tree_hsh.key(n)
|
35
|
+
end
|
36
|
+
acc[id] = out
|
37
|
+
end
|
38
|
+
|
39
|
+
stats = {
|
40
|
+
length: nil,
|
41
|
+
ci: nil,
|
42
|
+
hi: nil,
|
43
|
+
ci_ex: nil,
|
44
|
+
hi_ex: nil,
|
45
|
+
ri: nil,
|
46
|
+
rc: nil,
|
47
|
+
}
|
48
|
+
|
49
|
+
Dphil::Tree.new(tree_id, nodes: tree_nodes, stats: stats)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
data/lib/dphil/paup.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Dphil
|
4
|
+
#
|
5
|
+
# PAUP* Log Processor
|
6
|
+
#
|
7
|
+
module PAUP
|
8
|
+
def self.parse_trees(infile)
|
9
|
+
infile = File.expand_path(infile)
|
10
|
+
return STDERR.puts("File #{infile} not found.") unless File.exist?(infile)
|
11
|
+
|
12
|
+
data = File.read(infile).to_s.split(/^Tree ([0-9]+)\:$/)
|
13
|
+
return data if data.empty?
|
14
|
+
|
15
|
+
hash = { preamble: data.shift.strip }
|
16
|
+
|
17
|
+
trees = {}
|
18
|
+
data.each_slice(2) do |k, v|
|
19
|
+
next trees[:remainder] = k if v.nil?
|
20
|
+
branches = v.match(BRANCH_REGEXP)&.captures
|
21
|
+
changes = v.match(CHGLIST_REGEXP)&.captures
|
22
|
+
arr = []
|
23
|
+
arr.concat(%i[lengths stats].zip(branches)) unless branches.nil?
|
24
|
+
arr << [:changes, changes[0]] unless branches.nil?
|
25
|
+
trees[k.to_i] = arr.to_h
|
26
|
+
end
|
27
|
+
|
28
|
+
hash.merge(trees)
|
29
|
+
end
|
30
|
+
|
31
|
+
BRANCH_REGEXP = /^Branch lengths and linkages.*?\n\-{40,}\n(.*?)\n\-{40,}\n^Sum.*?(^Tree length =.*?)\n\n/m
|
32
|
+
CHGLIST_REGEXP = /^Character change lists:.*?\n\-{40,}\n(.*?)\n\n/m
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Dphil
|
4
|
+
module Refinements
|
5
|
+
module NaturalSort
|
6
|
+
refine Hash do
|
7
|
+
def natural_sort_keys
|
8
|
+
sort_by_key(true) do |a, b|
|
9
|
+
NaturalSort.grouped_compare(a, b) || a <=> b
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def sort_by_key(recursive = false, &block)
|
14
|
+
keys.sort(&block).each_with_object({}) do |key, acc|
|
15
|
+
acc[key] = self[key]
|
16
|
+
if recursive && acc[key].is_a?(Hash)
|
17
|
+
acc[key] = acc[key].sort_by_key(true, &block)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
class << self
|
24
|
+
CMP_REGEX = /((?:@{1,2}|[\$\:])?\p{L}+(?:[^\p{L}\d\s]*))|(\d+)/
|
25
|
+
private_constant :CMP_REGEX
|
26
|
+
|
27
|
+
def grouped_compare(a, b) # rubocop:disable CyclomaticComplexity
|
28
|
+
a = a&.scan(CMP_REGEX)
|
29
|
+
b = b&.scan(CMP_REGEX)
|
30
|
+
return if a.blank? || b.blank?
|
31
|
+
|
32
|
+
ret = nil
|
33
|
+
[a.size, b.size].max.times do |index|
|
34
|
+
a_cmp = coerce_chunk(a[index]) || (return -1)
|
35
|
+
b_cmp = coerce_chunk(b[index]) || (return 1)
|
36
|
+
ret = a_cmp <=> b_cmp || (a.is_a?(Integer) && -1 || b.is_a?(Integer) && 1)
|
37
|
+
return ret unless ret == 0 # rubocop:disable NumericPredicate
|
38
|
+
end
|
39
|
+
ret
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def coerce_chunk(chunk)
|
45
|
+
return if chunk.nil?
|
46
|
+
return chunk[0] unless chunk[0].nil?
|
47
|
+
Integer(chunk[1])
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,124 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "forwardable"
|
4
|
+
|
5
|
+
module Dphil
|
6
|
+
class ScriptString
|
7
|
+
using ::Ragabash::Refinements
|
8
|
+
extend Forwardable
|
9
|
+
def_delegators :@string, :<=>, :==, :===, :to_s, :to_str, :empty?, :length
|
10
|
+
attr_reader :string
|
11
|
+
|
12
|
+
def initialize(str, script = nil)
|
13
|
+
raise "Source must be a String" unless str.respond_to?(:to_str)
|
14
|
+
str = str.to_str
|
15
|
+
str = str.dup if str.frozen?
|
16
|
+
@string = str.encode!(Encoding::UTF_8)
|
17
|
+
self.script = script || self.script
|
18
|
+
end
|
19
|
+
|
20
|
+
def script
|
21
|
+
@script ||= Transliterate.detect(@string)
|
22
|
+
end
|
23
|
+
|
24
|
+
def script=(script)
|
25
|
+
@script = script.try(:flat_map, &:to_sym) || script.to_sym
|
26
|
+
end
|
27
|
+
|
28
|
+
def transliterate(target)
|
29
|
+
target = target.to_sym
|
30
|
+
string = Transliterate.transliterate(@string, from: @script, to: target)
|
31
|
+
if @script.is_a?(Array)
|
32
|
+
new_target = @script.dup
|
33
|
+
new_target[0] = target
|
34
|
+
new_target.uniq!
|
35
|
+
target = new_target
|
36
|
+
end
|
37
|
+
self.class.new(string, target)
|
38
|
+
end
|
39
|
+
|
40
|
+
def transliterate!(target)
|
41
|
+
target = target.to_sym
|
42
|
+
@string = Transliterate.transliterate(@string, from: @script, to: target)
|
43
|
+
if @script.is_a?(Array)
|
44
|
+
@script[0] = target
|
45
|
+
@script.uniq!
|
46
|
+
end
|
47
|
+
@string
|
48
|
+
end
|
49
|
+
|
50
|
+
# String methods implemented to return ScString intances wherever possible
|
51
|
+
|
52
|
+
def downcase
|
53
|
+
self.class.new(Transliterate.unicode_downcase(@string), @script)
|
54
|
+
end
|
55
|
+
|
56
|
+
def downcase!
|
57
|
+
ret_val = Transliterate.unicode_downcase!(@string)
|
58
|
+
self unless ret_val.nil?
|
59
|
+
end
|
60
|
+
|
61
|
+
def inspect
|
62
|
+
"#{@string.inspect}:#{script}"
|
63
|
+
end
|
64
|
+
|
65
|
+
def gsub(pattern, rep_hash = nil)
|
66
|
+
ret_val = if block_given?
|
67
|
+
@string.gsub(pattern, &Proc.new)
|
68
|
+
elsif !rep_hash.nil?
|
69
|
+
@string.gsub(pattern, rep_hash)
|
70
|
+
else
|
71
|
+
@string.gsub(pattern)
|
72
|
+
end
|
73
|
+
return ret_val if ret_val.is_a?(Enumerator)
|
74
|
+
self.class.new(ret_val, @script)
|
75
|
+
end
|
76
|
+
|
77
|
+
def gsub!(pattern, rep_hash = nil)
|
78
|
+
ret_val = if block_given?
|
79
|
+
@string.gsub!(pattern, &Proc.new)
|
80
|
+
elsif !rep_hash.nil?
|
81
|
+
@string.gsub!(pattern, rep_hash)
|
82
|
+
else
|
83
|
+
@string.gsub!(pattern)
|
84
|
+
end
|
85
|
+
return ret_val if ret_val.is_a?(Enumerator)
|
86
|
+
self unless ret_val.nil?
|
87
|
+
end
|
88
|
+
|
89
|
+
def scan(pattern)
|
90
|
+
ret_val = if block_given?
|
91
|
+
@string.scan(pattern, &Proc.new)
|
92
|
+
else
|
93
|
+
@string.scan(pattern)
|
94
|
+
end
|
95
|
+
return self if ret_val == @string
|
96
|
+
ret_val.map do |match|
|
97
|
+
next self.class.new(match, @script) if match.is_a?(String)
|
98
|
+
match.map do |group|
|
99
|
+
self.class.new(group, @script)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def slice(a, b = nil)
|
105
|
+
slice = b.nil? ? @string.slice(a) : @string.slice(a, b)
|
106
|
+
self.class.new(slice, @script)
|
107
|
+
end
|
108
|
+
alias [] slice
|
109
|
+
|
110
|
+
def slice!(a, b = nil)
|
111
|
+
slice = b.nil? ? @string.slice!(a) : @string.slice!(a, b)
|
112
|
+
self.class.new(slice, @script)
|
113
|
+
end
|
114
|
+
|
115
|
+
def strip
|
116
|
+
self.class.new(@string.strip, @script)
|
117
|
+
end
|
118
|
+
|
119
|
+
def strip!
|
120
|
+
ret_val = @string.strip!
|
121
|
+
self unless ret_val.nil?
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "forwardable"
|
4
|
+
|
5
|
+
require "dphil/syllables/syllable"
|
6
|
+
|
7
|
+
module Dphil
|
8
|
+
class Syllables
|
9
|
+
using ::Ragabash::Refinements
|
10
|
+
include Enumerable
|
11
|
+
extend Forwardable
|
12
|
+
def_delegators :@syllables, :[], :each, :first, :last, :length
|
13
|
+
|
14
|
+
attr_reader :source, :source_script, :weights, :syllables
|
15
|
+
|
16
|
+
def initialize(source, source_script: nil)
|
17
|
+
@source = source.to_str.safe_copy.freeze
|
18
|
+
@source_script = source_script || Transliterate.detect(@source) || Transliterate.default_script
|
19
|
+
slp1_syllables = VerseAnalysis.syllables(@source, from: @source_script, to: :slp1)
|
20
|
+
@weights = VerseAnalysis.syllables_weights(slp1_syllables, from: :slp1, contextual: true).freeze
|
21
|
+
@syllables = (slp1_syllables.map.with_index do |syl, i|
|
22
|
+
source = @source_script == :slp1 ? syl : Transliterate.t(syl, :slp1, @source_script)
|
23
|
+
Syllables::Syllable.new(source, @weights[i], parent: self, index: i, slp1: syl)
|
24
|
+
end).freeze
|
25
|
+
end
|
26
|
+
|
27
|
+
def inspect
|
28
|
+
"<Syllables \"#{@source}\":#{@source_script} (#{@weights}) (#{@syllables.count}) => #{@syllables.inspect}>"
|
29
|
+
end
|
30
|
+
|
31
|
+
def to_a
|
32
|
+
@syllables.map { |syl| Transliterate.t(syl.source, :slp1, @source_script) }
|
33
|
+
end
|
34
|
+
|
35
|
+
def to_s
|
36
|
+
@source.dup
|
37
|
+
end
|
38
|
+
|
39
|
+
def simple_weights
|
40
|
+
@simple_weights ||= @weights.upcase.freeze
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|