hierogloss 0.0.1 β 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +4 -0
- data/README.md +7 -3
- data/examples/disjunction.md +1 -1
- data/hierogloss.gemspec +1 -0
- data/lib/hierogloss/dictionary.rb +36 -18
- data/lib/hierogloss/gloss.rb +12 -4
- data/lib/hierogloss/mdc.rb +144 -0
- data/lib/hierogloss/metrics/data.rb +1084 -0
- data/lib/hierogloss/metrics.rb +47 -0
- data/lib/hierogloss/version.rb +1 -1
- data/lib/hierogloss.rb +2 -0
- data/src/dump_metrics.rb +45 -0
- data/test/test_dictionary.rb +2 -9
- data/test/test_gloss.rb +4 -2
- data/test/test_mdc.rb +55 -0
- data/test/test_metrics.rb +29 -0
- metadata +24 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e0ddc0d217ce2338569073c4d5da2259a595b62b
|
4
|
+
data.tar.gz: a7f2c0564b348abcc6c864796ad69873896079a4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 740be2d088f038a3a2d5ab40fc898799890ccba066047d293124532a62043372c6eebe28f75f9f23c7ab3d238b498a1c40e3e018ba9cbe27ad3e82a734ef77af
|
7
|
+
data.tar.gz: 0faa6a5486c3b58c9ac54e6c65028534ffe42a9472dbd782452887d525c439dbda57f009bf450d33e445798209d79c689bcd712712fe714062490522983b324c
|
data/Gemfile
CHANGED
@@ -2,3 +2,7 @@ source 'https://rubygems.org'
|
|
2
2
|
|
3
3
|
# Specify your gem's dependencies in hierogloss.gemspec
|
4
4
|
gemspec
|
5
|
+
|
6
|
+
# Temporary until Prawn supports ttfunk 1.1. It's in the current master
|
7
|
+
# branch; it's just not released yet. Enable this to run dump_metrics.rb.
|
8
|
+
gem "ttfunk", git: "https://github.com/prawnpdf/ttfunk.git", ref: "56be4cbb7c72"
|
data/README.md
CHANGED
@@ -2,15 +2,17 @@
|
|
2
2
|
|
3
3
|
**WORK IN PROGRESS. Future releases may change how things work.**
|
4
4
|
|
5
|
-
Hierogloss
|
6
|
-
|
5
|
+
Hierogloss is a set of extensions for the [Kramdown][] gem for people
|
6
|
+
working with hieroglyphs. Hierogloss allows you to mix glossed
|
7
|
+
hieroglyphic texts with Markdown-style formatting. For example, you can
|
8
|
+
write:
|
7
9
|
|
8
10
|
# Disjunction in Middle Egyptian
|
9
11
|
|
10
12
|
This example is based on one in Allen's excellent [Middle Egyptian: An
|
11
13
|
Introduction to the Language and Culture of Hieroglyphs][allen].
|
12
14
|
|
13
|
-
H:
|
15
|
+
H: z:A1*Z1 | π:π*π | π:π€-πͺ:π
±
|
14
16
|
L: s | s.t | r-pw
|
15
17
|
G: man | woman | whichever
|
16
18
|
T: either [a] man or [a] woman
|
@@ -70,3 +72,5 @@ but pass `input: 'hierogloss'` as an argument:
|
|
70
72
|
3. Commit your changes (`git commit -am 'Add some feature'`)
|
71
73
|
4. Push to the branch (`git push origin my-new-feature`)
|
72
74
|
5. Create new Pull Request
|
75
|
+
|
76
|
+
[kramdown]: http://kramdown.gettalong.org/
|
data/examples/disjunction.md
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
This example is based on one in Allen's excellent [Middle Egyptian: An
|
4
4
|
Introduction to the Language and Culture of Hieroglyphs][allen].
|
5
5
|
|
6
|
-
H:
|
6
|
+
H: z:A1*Z1 | π:π*π | π:π€-πͺ:π
±
|
7
7
|
L: s | s.t | r-pw
|
8
8
|
G: man | woman | whichever
|
9
9
|
T: either [a] man or [a] woman
|
data/hierogloss.gemspec
CHANGED
@@ -19,6 +19,7 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
21
|
spec.add_dependency "kramdown", "~> 1.3"
|
22
|
+
spec.add_dependency "parslet", "~> 1.4"
|
22
23
|
spec.add_development_dependency "prawn", "~> 0.14.0"
|
23
24
|
spec.add_development_dependency "bundler", "~> 1.3"
|
24
25
|
spec.add_development_dependency "rake"
|
@@ -6,34 +6,52 @@ module Hierogloss
|
|
6
6
|
DATA_DIR = File.join(File.dirname(__FILE__), '..', '..', 'data')
|
7
7
|
MDC_MAPPING_PATH = File.join(DATA_DIR, "Unicode-MdC-Mapping-v1.utf8")
|
8
8
|
|
9
|
-
|
9
|
+
SIGN_TO_GARDINER = {}
|
10
|
+
MDC_TO_SIGN = {}
|
11
|
+
SIGN_TO_MDC = {}
|
12
|
+
|
10
13
|
File.open(MDC_MAPPING_PATH, "r:bom|utf-8") do |f|
|
11
14
|
f.each_line do |l|
|
12
15
|
l.chomp!
|
13
16
|
sign, hex, codes, remarks = l.split(/\t/, 4)
|
14
17
|
for code in codes.split(/ /)
|
15
|
-
|
16
|
-
|
18
|
+
MDC_TO_SIGN[code] = sign
|
19
|
+
# Unliterals.
|
20
|
+
SIGN_TO_MDC[sign] = code if code.length == 1
|
21
|
+
# Gardiner codes, and composite signs starting with Gardiner codes.
|
22
|
+
next unless code =~ /\A[A-Z][0-9]+([-:*].*)?\z/
|
23
|
+
SIGN_TO_GARDINER[sign] = code
|
24
|
+
SIGN_TO_MDC[sign] ||= code
|
17
25
|
end
|
18
26
|
end
|
19
27
|
end
|
20
|
-
"πΏππππ
±π²ππͺππ
ππππππ‘ππ΄πππ‘πΌππΏπ§π".each_char do |c|
|
21
|
-
GARDINER.delete(c)
|
22
|
-
end
|
23
28
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
29
|
+
class << self
|
30
|
+
# Try to kick things into shape for hierogl.ch.
|
31
|
+
def headword(word)
|
32
|
+
hw = word
|
33
|
+
hw.gsub!(/[()]/, '')
|
34
|
+
hw.sub!(/=.*\z/, '')
|
35
|
+
hw.sub!(/\.w?t\z/, 't')
|
36
|
+
hw.sub!(/\..*\z/, '')
|
37
|
+
hw
|
38
|
+
end
|
33
39
|
|
34
|
-
|
35
|
-
|
36
|
-
|
40
|
+
# Given a Unicode hieroglyph, get the corresponding Gardiner sign.
|
41
|
+
def sign_to_gardiner(sign)
|
42
|
+
SIGN_TO_GARDINER[sign]
|
43
|
+
end
|
44
|
+
|
45
|
+
# Convert a Manuel de Codage transliteration to the corresponding Unicode
|
46
|
+
# sign.
|
47
|
+
def mdc_to_sign(mdc)
|
48
|
+
MDC_TO_SIGN[mdc]
|
49
|
+
end
|
50
|
+
|
51
|
+
# Convert a Unicode hieroglyph to a reasonable MdC representation.
|
52
|
+
def sign_to_mdc(sign)
|
53
|
+
SIGN_TO_MDC[sign]
|
54
|
+
end
|
37
55
|
end
|
38
56
|
end
|
39
57
|
end
|
data/lib/hierogloss/gloss.rb
CHANGED
@@ -7,6 +7,7 @@ module Hierogloss
|
|
7
7
|
#:nodoc:
|
8
8
|
class Row
|
9
9
|
attr_reader :raw_cells
|
10
|
+
alias :cells :raw_cells
|
10
11
|
|
11
12
|
def initialize(row_text)
|
12
13
|
@raw_cells = row_text.split(/\|/).map {|c| c.strip }
|
@@ -29,7 +30,7 @@ module Hierogloss
|
|
29
30
|
def to_kramdown
|
30
31
|
attrs = attributes
|
31
32
|
tr = Kramdown::Element.new(:tr, nil, attrs)
|
32
|
-
|
33
|
+
cells.each do |c|
|
33
34
|
td = Kramdown::Element.new(:td)
|
34
35
|
children = cell_to_kramdown(c)
|
35
36
|
if children.kind_of?(Array)
|
@@ -59,14 +60,21 @@ module Hierogloss
|
|
59
60
|
|
60
61
|
#:nodoc:
|
61
62
|
class HieroglyphRow < Row
|
63
|
+
UNLINKED = {}
|
64
|
+
"πΏππππ
±π²ππͺππ
ππππππ‘ππ΄πππ‘πΌππΏπ§π".each_char {|c| UNLINKED[c] = true }
|
65
|
+
|
62
66
|
def class_attr
|
63
67
|
'hgls-h'
|
64
68
|
end
|
65
69
|
|
70
|
+
def cells
|
71
|
+
@cells ||= raw_cells.map {|c| Hierogloss::MdC.parse(c) }
|
72
|
+
end
|
73
|
+
|
66
74
|
def cell_to_kramdown(cell)
|
67
|
-
cell.chars.map do |c|
|
68
|
-
gardiner = Dictionary.
|
69
|
-
|
75
|
+
cell.to_linear_hieroglyphs.chars.map do |c|
|
76
|
+
gardiner = Dictionary.sign_to_gardiner(c)
|
77
|
+
unless gardiner.nil? || UNLINKED[c]
|
70
78
|
search_link("Signe:#{gardiner}", c)
|
71
79
|
else
|
72
80
|
Kramdown::Element.new(:text, c)
|
@@ -0,0 +1,144 @@
|
|
1
|
+
require 'parslet'
|
2
|
+
|
3
|
+
module Hierogloss
|
4
|
+
#:nodoc: Our parser for the Manuel de Codage format.
|
5
|
+
module MdC
|
6
|
+
class Block
|
7
|
+
end
|
8
|
+
|
9
|
+
class Sign < Block
|
10
|
+
attr_reader :name
|
11
|
+
|
12
|
+
def initialize(name)
|
13
|
+
@name = name
|
14
|
+
end
|
15
|
+
|
16
|
+
def to_unicode
|
17
|
+
Hierogloss::Dictionary.mdc_to_sign(name) || name
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_debug
|
21
|
+
name
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_linear_hieroglyphs
|
25
|
+
to_unicode
|
26
|
+
end
|
27
|
+
|
28
|
+
def to_mdc(precedence)
|
29
|
+
mdc = Hierogloss::Dictionary.sign_to_mdc(name) || name
|
30
|
+
# Wrap composite signs in parens.
|
31
|
+
return "(#{mdc})" if mdc =~ /[-:*]/
|
32
|
+
mdc
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
class Group < Block
|
37
|
+
attr_reader :blocks
|
38
|
+
|
39
|
+
def initialize(blocks)
|
40
|
+
@blocks = blocks
|
41
|
+
end
|
42
|
+
|
43
|
+
def to_debug
|
44
|
+
blocks.map {|b| b.to_debug }
|
45
|
+
end
|
46
|
+
|
47
|
+
def to_linear_hieroglyphs
|
48
|
+
blocks.map {|b| b.to_linear_hieroglyphs }
|
49
|
+
end
|
50
|
+
|
51
|
+
protected
|
52
|
+
|
53
|
+
# This whole precedence business may need more test cases further work.
|
54
|
+
def maybe_parens(current, context, str)
|
55
|
+
if current < context
|
56
|
+
"(#{str})"
|
57
|
+
else
|
58
|
+
str
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
class Sequence < Group
|
64
|
+
def to_mdc(precedence)
|
65
|
+
maybe_parens(2, precedence, blocks.map {|b| b.to_mdc(2) }.join("*"))
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
class Stack < Group
|
70
|
+
def to_debug
|
71
|
+
[:stack].concat(super)
|
72
|
+
end
|
73
|
+
|
74
|
+
def to_mdc(precedence)
|
75
|
+
maybe_parens(1, precedence, blocks.map {|b| b.to_mdc(1) }.join(":"))
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
class Quadrats < Group
|
80
|
+
# Actually render to a string here.
|
81
|
+
def to_linear_hieroglyphs
|
82
|
+
super.flatten.join
|
83
|
+
end
|
84
|
+
|
85
|
+
def to_mdc
|
86
|
+
blocks.map {|b| b.to_mdc(0) }.join("-")
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
class Parser < Parslet::Parser
|
91
|
+
# Whitespace and equivalent delimiters.
|
92
|
+
rule(:space) { match('[-_ ]').repeat(1) }
|
93
|
+
rule(:space?) { space.maybe }
|
94
|
+
|
95
|
+
# Signs.
|
96
|
+
rule(:alpha_sign) { match('[A-Za-z0-9]').repeat(1) }
|
97
|
+
rule(:unicode_sign) { match('[\u{13000}-\u{1342F}]') }
|
98
|
+
rule(:sign) { (alpha_sign | unicode_sign).as(:sign) >> space? }
|
99
|
+
|
100
|
+
# Parenthesized blocks.
|
101
|
+
rule(:parens) { str('(') >> space? >> sequence >> str(')') >> space? }
|
102
|
+
|
103
|
+
# "Terminal" chunks in our expression grammar, which will match
|
104
|
+
# an actual, concrete symbol in the first position.
|
105
|
+
rule(:atomic) { sign | parens }
|
106
|
+
|
107
|
+
# A list of items with separators between them.
|
108
|
+
def separated(item, separator)
|
109
|
+
(item.as(:head) >> (separator >> item).repeat.as(:rest))
|
110
|
+
end
|
111
|
+
|
112
|
+
# Nested lists of signs separated by "*".
|
113
|
+
rule(:juxtaposed) { separated(atomic, str('*')).as(:juxtaposed) }
|
114
|
+
|
115
|
+
# Stacks of signs separated by ":".
|
116
|
+
rule(:stack) { separated(juxtaposed, str(':')).as(:stack) }
|
117
|
+
|
118
|
+
rule(:sequence) { stack.repeat }
|
119
|
+
root(:sequence)
|
120
|
+
end
|
121
|
+
|
122
|
+
class Transform < Parslet::Transform
|
123
|
+
# If we only have one item, we don't need to build an extra wrapper
|
124
|
+
# class; we can just pass it up.
|
125
|
+
def self.lists_as(klass, list)
|
126
|
+
if list.length == 1
|
127
|
+
list.first
|
128
|
+
else
|
129
|
+
klass.new(list)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
rule(head: subtree(:head), rest: sequence(:rest)) { [head].concat(rest) }
|
134
|
+
rule(sign: simple(:sign)) { Sign.new(sign.to_s) }
|
135
|
+
rule(stack: subtree(:list)) {|d| lists_as(Stack, d[:list]) }
|
136
|
+
rule(juxtaposed: subtree(:list)) {|d| lists_as(Sequence, d[:list]) }
|
137
|
+
end
|
138
|
+
|
139
|
+
def self.parse(input)
|
140
|
+
parsed = Parser.new.parse(input)
|
141
|
+
Quadrats.new(Transform.new.apply(parsed))
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|