hierogloss 0.0.1 β 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +4 -0
- data/README.md +7 -3
- data/examples/disjunction.md +1 -1
- data/hierogloss.gemspec +1 -0
- data/lib/hierogloss/dictionary.rb +36 -18
- data/lib/hierogloss/gloss.rb +12 -4
- data/lib/hierogloss/mdc.rb +144 -0
- data/lib/hierogloss/metrics/data.rb +1084 -0
- data/lib/hierogloss/metrics.rb +47 -0
- data/lib/hierogloss/version.rb +1 -1
- data/lib/hierogloss.rb +2 -0
- data/src/dump_metrics.rb +45 -0
- data/test/test_dictionary.rb +2 -9
- data/test/test_gloss.rb +4 -2
- data/test/test_mdc.rb +55 -0
- data/test/test_metrics.rb +29 -0
- metadata +24 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e0ddc0d217ce2338569073c4d5da2259a595b62b
|
4
|
+
data.tar.gz: a7f2c0564b348abcc6c864796ad69873896079a4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 740be2d088f038a3a2d5ab40fc898799890ccba066047d293124532a62043372c6eebe28f75f9f23c7ab3d238b498a1c40e3e018ba9cbe27ad3e82a734ef77af
|
7
|
+
data.tar.gz: 0faa6a5486c3b58c9ac54e6c65028534ffe42a9472dbd782452887d525c439dbda57f009bf450d33e445798209d79c689bcd712712fe714062490522983b324c
|
data/Gemfile
CHANGED
@@ -2,3 +2,7 @@ source 'https://rubygems.org'
|
|
2
2
|
|
3
3
|
# Specify your gem's dependencies in hierogloss.gemspec
|
4
4
|
gemspec
|
5
|
+
|
6
|
+
# Temporary until Prawn supports ttfunk 1.1. It's in the current master
|
7
|
+
# branch; it's just not released yet. Enable this to run dump_metrics.rb.
|
8
|
+
gem "ttfunk", git: "https://github.com/prawnpdf/ttfunk.git", ref: "56be4cbb7c72"
|
data/README.md
CHANGED
@@ -2,15 +2,17 @@
|
|
2
2
|
|
3
3
|
**WORK IN PROGRESS. Future releases may change how things work.**
|
4
4
|
|
5
|
-
Hierogloss
|
6
|
-
|
5
|
+
Hierogloss is a set of extensions for the [Kramdown][] gem for people
|
6
|
+
working with hieroglyphs. Hierogloss allows you to mix glossed
|
7
|
+
hieroglyphic texts with Markdown-style formatting. For example, you can
|
8
|
+
write:
|
7
9
|
|
8
10
|
# Disjunction in Middle Egyptian
|
9
11
|
|
10
12
|
This example is based on one in Allen's excellent [Middle Egyptian: An
|
11
13
|
Introduction to the Language and Culture of Hieroglyphs][allen].
|
12
14
|
|
13
|
-
H:
|
15
|
+
H: z:A1*Z1 | π:π*π | π:π€-πͺ:π
±
|
14
16
|
L: s | s.t | r-pw
|
15
17
|
G: man | woman | whichever
|
16
18
|
T: either [a] man or [a] woman
|
@@ -70,3 +72,5 @@ but pass `input: 'hierogloss'` as an argument:
|
|
70
72
|
3. Commit your changes (`git commit -am 'Add some feature'`)
|
71
73
|
4. Push to the branch (`git push origin my-new-feature`)
|
72
74
|
5. Create new Pull Request
|
75
|
+
|
76
|
+
[kramdown]: http://kramdown.gettalong.org/
|
data/examples/disjunction.md
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
This example is based on one in Allen's excellent [Middle Egyptian: An
|
4
4
|
Introduction to the Language and Culture of Hieroglyphs][allen].
|
5
5
|
|
6
|
-
H:
|
6
|
+
H: z:A1*Z1 | π:π*π | π:π€-πͺ:π
±
|
7
7
|
L: s | s.t | r-pw
|
8
8
|
G: man | woman | whichever
|
9
9
|
T: either [a] man or [a] woman
|
data/hierogloss.gemspec
CHANGED
@@ -19,6 +19,7 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
21
|
spec.add_dependency "kramdown", "~> 1.3"
|
22
|
+
spec.add_dependency "parslet", "~> 1.4"
|
22
23
|
spec.add_development_dependency "prawn", "~> 0.14.0"
|
23
24
|
spec.add_development_dependency "bundler", "~> 1.3"
|
24
25
|
spec.add_development_dependency "rake"
|
@@ -6,34 +6,52 @@ module Hierogloss
|
|
6
6
|
DATA_DIR = File.join(File.dirname(__FILE__), '..', '..', 'data')
|
7
7
|
MDC_MAPPING_PATH = File.join(DATA_DIR, "Unicode-MdC-Mapping-v1.utf8")
|
8
8
|
|
9
|
-
|
9
|
+
SIGN_TO_GARDINER = {}
|
10
|
+
MDC_TO_SIGN = {}
|
11
|
+
SIGN_TO_MDC = {}
|
12
|
+
|
10
13
|
File.open(MDC_MAPPING_PATH, "r:bom|utf-8") do |f|
|
11
14
|
f.each_line do |l|
|
12
15
|
l.chomp!
|
13
16
|
sign, hex, codes, remarks = l.split(/\t/, 4)
|
14
17
|
for code in codes.split(/ /)
|
15
|
-
|
16
|
-
|
18
|
+
MDC_TO_SIGN[code] = sign
|
19
|
+
# Unliterals.
|
20
|
+
SIGN_TO_MDC[sign] = code if code.length == 1
|
21
|
+
# Gardiner codes, and composite signs starting with Gardiner codes.
|
22
|
+
next unless code =~ /\A[A-Z][0-9]+([-:*].*)?\z/
|
23
|
+
SIGN_TO_GARDINER[sign] = code
|
24
|
+
SIGN_TO_MDC[sign] ||= code
|
17
25
|
end
|
18
26
|
end
|
19
27
|
end
|
20
|
-
"πΏππππ
±π²ππͺππ
ππππππ‘ππ΄πππ‘πΌππΏπ§π".each_char do |c|
|
21
|
-
GARDINER.delete(c)
|
22
|
-
end
|
23
28
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
29
|
+
class << self
|
30
|
+
# Try to kick things into shape for hierogl.ch.
|
31
|
+
def headword(word)
|
32
|
+
hw = word
|
33
|
+
hw.gsub!(/[()]/, '')
|
34
|
+
hw.sub!(/=.*\z/, '')
|
35
|
+
hw.sub!(/\.w?t\z/, 't')
|
36
|
+
hw.sub!(/\..*\z/, '')
|
37
|
+
hw
|
38
|
+
end
|
33
39
|
|
34
|
-
|
35
|
-
|
36
|
-
|
40
|
+
# Given a Unicode hieroglyph, get the corresponding Gardiner sign.
|
41
|
+
def sign_to_gardiner(sign)
|
42
|
+
SIGN_TO_GARDINER[sign]
|
43
|
+
end
|
44
|
+
|
45
|
+
# Convert a Manuel de Codage transliteration to the corresponding Unicode
|
46
|
+
# sign.
|
47
|
+
def mdc_to_sign(mdc)
|
48
|
+
MDC_TO_SIGN[mdc]
|
49
|
+
end
|
50
|
+
|
51
|
+
# Convert a Unicode hieroglyph to a reasonable MdC representation.
|
52
|
+
def sign_to_mdc(sign)
|
53
|
+
SIGN_TO_MDC[sign]
|
54
|
+
end
|
37
55
|
end
|
38
56
|
end
|
39
57
|
end
|
data/lib/hierogloss/gloss.rb
CHANGED
@@ -7,6 +7,7 @@ module Hierogloss
|
|
7
7
|
#:nodoc:
|
8
8
|
class Row
|
9
9
|
attr_reader :raw_cells
|
10
|
+
alias :cells :raw_cells
|
10
11
|
|
11
12
|
def initialize(row_text)
|
12
13
|
@raw_cells = row_text.split(/\|/).map {|c| c.strip }
|
@@ -29,7 +30,7 @@ module Hierogloss
|
|
29
30
|
def to_kramdown
|
30
31
|
attrs = attributes
|
31
32
|
tr = Kramdown::Element.new(:tr, nil, attrs)
|
32
|
-
|
33
|
+
cells.each do |c|
|
33
34
|
td = Kramdown::Element.new(:td)
|
34
35
|
children = cell_to_kramdown(c)
|
35
36
|
if children.kind_of?(Array)
|
@@ -59,14 +60,21 @@ module Hierogloss
|
|
59
60
|
|
60
61
|
#:nodoc:
|
61
62
|
class HieroglyphRow < Row
|
63
|
+
UNLINKED = {}
|
64
|
+
"πΏππππ
±π²ππͺππ
ππππππ‘ππ΄πππ‘πΌππΏπ§π".each_char {|c| UNLINKED[c] = true }
|
65
|
+
|
62
66
|
def class_attr
|
63
67
|
'hgls-h'
|
64
68
|
end
|
65
69
|
|
70
|
+
def cells
|
71
|
+
@cells ||= raw_cells.map {|c| Hierogloss::MdC.parse(c) }
|
72
|
+
end
|
73
|
+
|
66
74
|
def cell_to_kramdown(cell)
|
67
|
-
cell.chars.map do |c|
|
68
|
-
gardiner = Dictionary.
|
69
|
-
|
75
|
+
cell.to_linear_hieroglyphs.chars.map do |c|
|
76
|
+
gardiner = Dictionary.sign_to_gardiner(c)
|
77
|
+
unless gardiner.nil? || UNLINKED[c]
|
70
78
|
search_link("Signe:#{gardiner}", c)
|
71
79
|
else
|
72
80
|
Kramdown::Element.new(:text, c)
|
@@ -0,0 +1,144 @@
|
|
1
|
+
require 'parslet'
|
2
|
+
|
3
|
+
module Hierogloss
|
4
|
+
#:nodoc: Our parser for the Manuel de Codage format.
|
5
|
+
module MdC
|
6
|
+
class Block
|
7
|
+
end
|
8
|
+
|
9
|
+
class Sign < Block
|
10
|
+
attr_reader :name
|
11
|
+
|
12
|
+
def initialize(name)
|
13
|
+
@name = name
|
14
|
+
end
|
15
|
+
|
16
|
+
def to_unicode
|
17
|
+
Hierogloss::Dictionary.mdc_to_sign(name) || name
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_debug
|
21
|
+
name
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_linear_hieroglyphs
|
25
|
+
to_unicode
|
26
|
+
end
|
27
|
+
|
28
|
+
def to_mdc(precedence)
|
29
|
+
mdc = Hierogloss::Dictionary.sign_to_mdc(name) || name
|
30
|
+
# Wrap composite signs in parens.
|
31
|
+
return "(#{mdc})" if mdc =~ /[-:*]/
|
32
|
+
mdc
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
class Group < Block
|
37
|
+
attr_reader :blocks
|
38
|
+
|
39
|
+
def initialize(blocks)
|
40
|
+
@blocks = blocks
|
41
|
+
end
|
42
|
+
|
43
|
+
def to_debug
|
44
|
+
blocks.map {|b| b.to_debug }
|
45
|
+
end
|
46
|
+
|
47
|
+
def to_linear_hieroglyphs
|
48
|
+
blocks.map {|b| b.to_linear_hieroglyphs }
|
49
|
+
end
|
50
|
+
|
51
|
+
protected
|
52
|
+
|
53
|
+
# This whole precedence business may need more test cases further work.
|
54
|
+
def maybe_parens(current, context, str)
|
55
|
+
if current < context
|
56
|
+
"(#{str})"
|
57
|
+
else
|
58
|
+
str
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
class Sequence < Group
|
64
|
+
def to_mdc(precedence)
|
65
|
+
maybe_parens(2, precedence, blocks.map {|b| b.to_mdc(2) }.join("*"))
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
class Stack < Group
|
70
|
+
def to_debug
|
71
|
+
[:stack].concat(super)
|
72
|
+
end
|
73
|
+
|
74
|
+
def to_mdc(precedence)
|
75
|
+
maybe_parens(1, precedence, blocks.map {|b| b.to_mdc(1) }.join(":"))
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
class Quadrats < Group
|
80
|
+
# Actually render to a string here.
|
81
|
+
def to_linear_hieroglyphs
|
82
|
+
super.flatten.join
|
83
|
+
end
|
84
|
+
|
85
|
+
def to_mdc
|
86
|
+
blocks.map {|b| b.to_mdc(0) }.join("-")
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
class Parser < Parslet::Parser
|
91
|
+
# Whitespace and equivalent delimiters.
|
92
|
+
rule(:space) { match('[-_ ]').repeat(1) }
|
93
|
+
rule(:space?) { space.maybe }
|
94
|
+
|
95
|
+
# Signs.
|
96
|
+
rule(:alpha_sign) { match('[A-Za-z0-9]').repeat(1) }
|
97
|
+
rule(:unicode_sign) { match('[\u{13000}-\u{1342F}]') }
|
98
|
+
rule(:sign) { (alpha_sign | unicode_sign).as(:sign) >> space? }
|
99
|
+
|
100
|
+
# Parenthesized blocks.
|
101
|
+
rule(:parens) { str('(') >> space? >> sequence >> str(')') >> space? }
|
102
|
+
|
103
|
+
# "Terminal" chunks in our expression grammar, which will match
|
104
|
+
# an actual, concrete symbol in the first position.
|
105
|
+
rule(:atomic) { sign | parens }
|
106
|
+
|
107
|
+
# A list of items with separators between them.
|
108
|
+
def separated(item, separator)
|
109
|
+
(item.as(:head) >> (separator >> item).repeat.as(:rest))
|
110
|
+
end
|
111
|
+
|
112
|
+
# Nested lists of signs separated by "*".
|
113
|
+
rule(:juxtaposed) { separated(atomic, str('*')).as(:juxtaposed) }
|
114
|
+
|
115
|
+
# Stacks of signs separated by ":".
|
116
|
+
rule(:stack) { separated(juxtaposed, str(':')).as(:stack) }
|
117
|
+
|
118
|
+
rule(:sequence) { stack.repeat }
|
119
|
+
root(:sequence)
|
120
|
+
end
|
121
|
+
|
122
|
+
class Transform < Parslet::Transform
|
123
|
+
# If we only have one item, we don't need to build an extra wrapper
|
124
|
+
# class; we can just pass it up.
|
125
|
+
def self.lists_as(klass, list)
|
126
|
+
if list.length == 1
|
127
|
+
list.first
|
128
|
+
else
|
129
|
+
klass.new(list)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
rule(head: subtree(:head), rest: sequence(:rest)) { [head].concat(rest) }
|
134
|
+
rule(sign: simple(:sign)) { Sign.new(sign.to_s) }
|
135
|
+
rule(stack: subtree(:list)) {|d| lists_as(Stack, d[:list]) }
|
136
|
+
rule(juxtaposed: subtree(:list)) {|d| lists_as(Sequence, d[:list]) }
|
137
|
+
end
|
138
|
+
|
139
|
+
def self.parse(input)
|
140
|
+
parsed = Parser.new.parse(input)
|
141
|
+
Quadrats.new(Transform.new.apply(parsed))
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|