hierogloss 0.0.1 β†’ 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 95e96759e2925f55b6fbb05fe67628680de08e7a
4
- data.tar.gz: 7ead6a34ffe4281f10ec7cebaa94a14bca713009
3
+ metadata.gz: e0ddc0d217ce2338569073c4d5da2259a595b62b
4
+ data.tar.gz: a7f2c0564b348abcc6c864796ad69873896079a4
5
5
  SHA512:
6
- metadata.gz: 2f6d2686c6ba86ce7cc215652c8f6534fdaa3543a52adb6dee4eecdd32d3fb6b9d446d4ebd408e0bb7aad5faa61043059af9ecfbdc573068ad93cae0a22f74d4
7
- data.tar.gz: 5fbf2d371331598c34df3b5786ef9ba954f5f769d43d54fed97e1dc6be8125a8b20ad4605a498ebcdd8fc5c363254820eb0c7851a97c85a75fc698ae27571844
6
+ metadata.gz: 740be2d088f038a3a2d5ab40fc898799890ccba066047d293124532a62043372c6eebe28f75f9f23c7ab3d238b498a1c40e3e018ba9cbe27ad3e82a734ef77af
7
+ data.tar.gz: 0faa6a5486c3b58c9ac54e6c65028534ffe42a9472dbd782452887d525c439dbda57f009bf450d33e445798209d79c689bcd712712fe714062490522983b324c
data/Gemfile CHANGED
@@ -2,3 +2,7 @@ source 'https://rubygems.org'
2
2
 
3
3
  # Specify your gem's dependencies in hierogloss.gemspec
4
4
  gemspec
5
+
6
+ # Temporary until Prawn supports ttfunk 1.1. It's in the current master
7
+ # branch; it's just not released yet. Enable this to run dump_metrics.rb.
8
+ gem "ttfunk", git: "https://github.com/prawnpdf/ttfunk.git", ref: "56be4cbb7c72"
data/README.md CHANGED
@@ -2,15 +2,17 @@
2
2
 
3
3
  **WORK IN PROGRESS. Future releases may change how things work.**
4
4
 
5
- Hierogloss allows you to mix glossed hieroglyphic texts with Markdown-style
6
- formatting. For example, you can write:
5
+ Hierogloss is a set of extensions for the [Kramdown][] gem for people
6
+ working with hieroglyphs. Hierogloss allows you to mix glossed
7
+ hieroglyphic texts with Markdown-style formatting. For example, you can
8
+ write:
7
9
 
8
10
  # Disjunction in Middle Egyptian
9
11
 
10
12
  This example is based on one in Allen's excellent [Middle Egyptian: An
11
13
  Introduction to the Language and Culture of Hieroglyphs][allen].
12
14
 
13
- H: π“Šƒπ“€€π“€ | π“Šƒπ“π“ | 𓂋𓏀π“Šͺπ“…±
15
+ H: z:A1*Z1 | π“Šƒ:𓏏*𓁐 | π“‚‹:𓏀-π“Šͺ:π“…±
14
16
  L: s | s.t | r-pw
15
17
  G: man | woman | whichever
16
18
  T: either [a] man or [a] woman
@@ -70,3 +72,5 @@ but pass `input: 'hierogloss'` as an argument:
70
72
  3. Commit your changes (`git commit -am 'Add some feature'`)
71
73
  4. Push to the branch (`git push origin my-new-feature`)
72
74
  5. Create new Pull Request
75
+
76
+ [kramdown]: http://kramdown.gettalong.org/
@@ -3,7 +3,7 @@
3
3
  This example is based on one in Allen's excellent [Middle Egyptian: An
4
4
  Introduction to the Language and Culture of Hieroglyphs][allen].
5
5
 
6
- H: π“Šƒπ“€€π“€ | π“Šƒπ“π“ | 𓂋𓏀π“Šͺπ“…±
6
+ H: z:A1*Z1 | π“Šƒ:𓏏*𓁐 | π“‚‹:𓏀-π“Šͺ:π“…±
7
7
  L: s | s.t | r-pw
8
8
  G: man | woman | whichever
9
9
  T: either [a] man or [a] woman
data/hierogloss.gemspec CHANGED
@@ -19,6 +19,7 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ["lib"]
20
20
 
21
21
  spec.add_dependency "kramdown", "~> 1.3"
22
+ spec.add_dependency "parslet", "~> 1.4"
22
23
  spec.add_development_dependency "prawn", "~> 0.14.0"
23
24
  spec.add_development_dependency "bundler", "~> 1.3"
24
25
  spec.add_development_dependency "rake"
@@ -6,34 +6,52 @@ module Hierogloss
6
6
  DATA_DIR = File.join(File.dirname(__FILE__), '..', '..', 'data')
7
7
  MDC_MAPPING_PATH = File.join(DATA_DIR, "Unicode-MdC-Mapping-v1.utf8")
8
8
 
9
- GARDINER = {}
9
+ SIGN_TO_GARDINER = {}
10
+ MDC_TO_SIGN = {}
11
+ SIGN_TO_MDC = {}
12
+
10
13
  File.open(MDC_MAPPING_PATH, "r:bom|utf-8") do |f|
11
14
  f.each_line do |l|
12
15
  l.chomp!
13
16
  sign, hex, codes, remarks = l.split(/\t/, 4)
14
17
  for code in codes.split(/ /)
15
- next unless code =~ /\A[A-Z][0-9]+\z/
16
- GARDINER[sign] = code
18
+ MDC_TO_SIGN[code] = sign
19
+ # Unliterals.
20
+ SIGN_TO_MDC[sign] = code if code.length == 1
21
+ # Gardiner codes, and composite signs starting with Gardiner codes.
22
+ next unless code =~ /\A[A-Z][0-9]+([-:*].*)?\z/
23
+ SIGN_TO_GARDINER[sign] = code
24
+ SIGN_TO_MDC[sign] ||= code
17
25
  end
18
26
  end
19
27
  end
20
- "𓄿𓇋𓏭𓂝𓅱𓏲𓃀π“Šͺπ“†‘π“…“π“ˆ–π“‚‹π“‰”π“Ž›π“π“„‘π“Šƒπ“‹΄π“ˆ™π“ˆŽπ“Ž‘π“ŽΌπ“π“Ώπ“‚§π“†“".each_char do |c|
21
- GARDINER.delete(c)
22
- end
23
28
 
24
- # Try to kick things into shape for hierogl.ch.
25
- def self.headword(word)
26
- hw = word
27
- hw.gsub!(/[()]/, '')
28
- hw.sub!(/=.*\z/, '')
29
- hw.sub!(/\.w?t\z/, 't')
30
- hw.sub!(/\..*\z/, '')
31
- hw
32
- end
29
+ class << self
30
+ # Try to kick things into shape for hierogl.ch.
31
+ def headword(word)
32
+ hw = word
33
+ hw.gsub!(/[()]/, '')
34
+ hw.sub!(/=.*\z/, '')
35
+ hw.sub!(/\.w?t\z/, 't')
36
+ hw.sub!(/\..*\z/, '')
37
+ hw
38
+ end
33
39
 
34
- # Given a Unicode hieroglyph, get the corresponding Gardiner sign.
35
- def self.gardiner(sign)
36
- GARDINER[sign]
40
+ # Given a Unicode hieroglyph, get the corresponding Gardiner sign.
41
+ def sign_to_gardiner(sign)
42
+ SIGN_TO_GARDINER[sign]
43
+ end
44
+
45
+ # Convert a Manuel de Codage transliteration to the corresponding Unicode
46
+ # sign.
47
+ def mdc_to_sign(mdc)
48
+ MDC_TO_SIGN[mdc]
49
+ end
50
+
51
+ # Convert a Unicode hieroglyph to a reasonable MdC representation.
52
+ def sign_to_mdc(sign)
53
+ SIGN_TO_MDC[sign]
54
+ end
37
55
  end
38
56
  end
39
57
  end
@@ -7,6 +7,7 @@ module Hierogloss
7
7
  #:nodoc:
8
8
  class Row
9
9
  attr_reader :raw_cells
10
+ alias :cells :raw_cells
10
11
 
11
12
  def initialize(row_text)
12
13
  @raw_cells = row_text.split(/\|/).map {|c| c.strip }
@@ -29,7 +30,7 @@ module Hierogloss
29
30
  def to_kramdown
30
31
  attrs = attributes
31
32
  tr = Kramdown::Element.new(:tr, nil, attrs)
32
- raw_cells.each do |c|
33
+ cells.each do |c|
33
34
  td = Kramdown::Element.new(:td)
34
35
  children = cell_to_kramdown(c)
35
36
  if children.kind_of?(Array)
@@ -59,14 +60,21 @@ module Hierogloss
59
60
 
60
61
  #:nodoc:
61
62
  class HieroglyphRow < Row
63
+ UNLINKED = {}
64
+ "𓄿𓇋𓏭𓂝𓅱𓏲𓃀π“Šͺπ“†‘π“…“π“ˆ–π“‚‹π“‰”π“Ž›π“π“„‘π“Šƒπ“‹΄π“ˆ™π“ˆŽπ“Ž‘π“ŽΌπ“π“Ώπ“‚§π“†“".each_char {|c| UNLINKED[c] = true }
65
+
62
66
  def class_attr
63
67
  'hgls-h'
64
68
  end
65
69
 
70
+ def cells
71
+ @cells ||= raw_cells.map {|c| Hierogloss::MdC.parse(c) }
72
+ end
73
+
66
74
  def cell_to_kramdown(cell)
67
- cell.chars.map do |c|
68
- gardiner = Dictionary.gardiner(c)
69
- if !gardiner.nil?
75
+ cell.to_linear_hieroglyphs.chars.map do |c|
76
+ gardiner = Dictionary.sign_to_gardiner(c)
77
+ unless gardiner.nil? || UNLINKED[c]
70
78
  search_link("Signe:#{gardiner}", c)
71
79
  else
72
80
  Kramdown::Element.new(:text, c)
@@ -0,0 +1,144 @@
1
+ require 'parslet'
2
+
3
+ module Hierogloss
4
+ #:nodoc: Our parser for the Manuel de Codage format.
5
+ module MdC
6
+ class Block
7
+ end
8
+
9
+ class Sign < Block
10
+ attr_reader :name
11
+
12
+ def initialize(name)
13
+ @name = name
14
+ end
15
+
16
+ def to_unicode
17
+ Hierogloss::Dictionary.mdc_to_sign(name) || name
18
+ end
19
+
20
+ def to_debug
21
+ name
22
+ end
23
+
24
+ def to_linear_hieroglyphs
25
+ to_unicode
26
+ end
27
+
28
+ def to_mdc(precedence)
29
+ mdc = Hierogloss::Dictionary.sign_to_mdc(name) || name
30
+ # Wrap composite signs in parens.
31
+ return "(#{mdc})" if mdc =~ /[-:*]/
32
+ mdc
33
+ end
34
+ end
35
+
36
+ class Group < Block
37
+ attr_reader :blocks
38
+
39
+ def initialize(blocks)
40
+ @blocks = blocks
41
+ end
42
+
43
+ def to_debug
44
+ blocks.map {|b| b.to_debug }
45
+ end
46
+
47
+ def to_linear_hieroglyphs
48
+ blocks.map {|b| b.to_linear_hieroglyphs }
49
+ end
50
+
51
+ protected
52
+
53
+ # This whole precedence business may need more test cases further work.
54
+ def maybe_parens(current, context, str)
55
+ if current < context
56
+ "(#{str})"
57
+ else
58
+ str
59
+ end
60
+ end
61
+ end
62
+
63
+ class Sequence < Group
64
+ def to_mdc(precedence)
65
+ maybe_parens(2, precedence, blocks.map {|b| b.to_mdc(2) }.join("*"))
66
+ end
67
+ end
68
+
69
+ class Stack < Group
70
+ def to_debug
71
+ [:stack].concat(super)
72
+ end
73
+
74
+ def to_mdc(precedence)
75
+ maybe_parens(1, precedence, blocks.map {|b| b.to_mdc(1) }.join(":"))
76
+ end
77
+ end
78
+
79
+ class Quadrats < Group
80
+ # Actually render to a string here.
81
+ def to_linear_hieroglyphs
82
+ super.flatten.join
83
+ end
84
+
85
+ def to_mdc
86
+ blocks.map {|b| b.to_mdc(0) }.join("-")
87
+ end
88
+ end
89
+
90
+ class Parser < Parslet::Parser
91
+ # Whitespace and equivalent delimiters.
92
+ rule(:space) { match('[-_ ]').repeat(1) }
93
+ rule(:space?) { space.maybe }
94
+
95
+ # Signs.
96
+ rule(:alpha_sign) { match('[A-Za-z0-9]').repeat(1) }
97
+ rule(:unicode_sign) { match('[\u{13000}-\u{1342F}]') }
98
+ rule(:sign) { (alpha_sign | unicode_sign).as(:sign) >> space? }
99
+
100
+ # Parenthesized blocks.
101
+ rule(:parens) { str('(') >> space? >> sequence >> str(')') >> space? }
102
+
103
+ # "Terminal" chunks in our expression grammar, which will match
104
+ # an actual, concrete symbol in the first position.
105
+ rule(:atomic) { sign | parens }
106
+
107
+ # A list of items with separators between them.
108
+ def separated(item, separator)
109
+ (item.as(:head) >> (separator >> item).repeat.as(:rest))
110
+ end
111
+
112
+ # Nested lists of signs separated by "*".
113
+ rule(:juxtaposed) { separated(atomic, str('*')).as(:juxtaposed) }
114
+
115
+ # Stacks of signs separated by ":".
116
+ rule(:stack) { separated(juxtaposed, str(':')).as(:stack) }
117
+
118
+ rule(:sequence) { stack.repeat }
119
+ root(:sequence)
120
+ end
121
+
122
+ class Transform < Parslet::Transform
123
+ # If we only have one item, we don't need to build an extra wrapper
124
+ # class; we can just pass it up.
125
+ def self.lists_as(klass, list)
126
+ if list.length == 1
127
+ list.first
128
+ else
129
+ klass.new(list)
130
+ end
131
+ end
132
+
133
+ rule(head: subtree(:head), rest: sequence(:rest)) { [head].concat(rest) }
134
+ rule(sign: simple(:sign)) { Sign.new(sign.to_s) }
135
+ rule(stack: subtree(:list)) {|d| lists_as(Stack, d[:list]) }
136
+ rule(juxtaposed: subtree(:list)) {|d| lists_as(Sequence, d[:list]) }
137
+ end
138
+
139
+ def self.parse(input)
140
+ parsed = Parser.new.parse(input)
141
+ Quadrats.new(Transform.new.apply(parsed))
142
+ end
143
+ end
144
+ end