hierogloss 0.0.1 β†’ 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 95e96759e2925f55b6fbb05fe67628680de08e7a
4
- data.tar.gz: 7ead6a34ffe4281f10ec7cebaa94a14bca713009
3
+ metadata.gz: e0ddc0d217ce2338569073c4d5da2259a595b62b
4
+ data.tar.gz: a7f2c0564b348abcc6c864796ad69873896079a4
5
5
  SHA512:
6
- metadata.gz: 2f6d2686c6ba86ce7cc215652c8f6534fdaa3543a52adb6dee4eecdd32d3fb6b9d446d4ebd408e0bb7aad5faa61043059af9ecfbdc573068ad93cae0a22f74d4
7
- data.tar.gz: 5fbf2d371331598c34df3b5786ef9ba954f5f769d43d54fed97e1dc6be8125a8b20ad4605a498ebcdd8fc5c363254820eb0c7851a97c85a75fc698ae27571844
6
+ metadata.gz: 740be2d088f038a3a2d5ab40fc898799890ccba066047d293124532a62043372c6eebe28f75f9f23c7ab3d238b498a1c40e3e018ba9cbe27ad3e82a734ef77af
7
+ data.tar.gz: 0faa6a5486c3b58c9ac54e6c65028534ffe42a9472dbd782452887d525c439dbda57f009bf450d33e445798209d79c689bcd712712fe714062490522983b324c
data/Gemfile CHANGED
@@ -2,3 +2,7 @@ source 'https://rubygems.org'
2
2
 
3
3
  # Specify your gem's dependencies in hierogloss.gemspec
4
4
  gemspec
5
+
6
+ # Temporary until Prawn supports ttfunk 1.1. It's in the current master
7
+ # branch; it's just not released yet. Enable this to run dump_metrics.rb.
8
+ gem "ttfunk", git: "https://github.com/prawnpdf/ttfunk.git", ref: "56be4cbb7c72"
data/README.md CHANGED
@@ -2,15 +2,17 @@
2
2
 
3
3
  **WORK IN PROGRESS. Future releases may change how things work.**
4
4
 
5
- Hierogloss allows you to mix glossed hieroglyphic texts with Markdown-style
6
- formatting. For example, you can write:
5
+ Hierogloss is a set of extensions for the [Kramdown][] gem for people
6
+ working with hieroglyphs. Hierogloss allows you to mix glossed
7
+ hieroglyphic texts with Markdown-style formatting. For example, you can
8
+ write:
7
9
 
8
10
  # Disjunction in Middle Egyptian
9
11
 
10
12
  This example is based on one in Allen's excellent [Middle Egyptian: An
11
13
  Introduction to the Language and Culture of Hieroglyphs][allen].
12
14
 
13
- H: π“Šƒπ“€€π“€ | π“Šƒπ“π“ | 𓂋𓏀π“Šͺπ“…±
15
+ H: z:A1*Z1 | π“Šƒ:𓏏*𓁐 | π“‚‹:𓏀-π“Šͺ:π“…±
14
16
  L: s | s.t | r-pw
15
17
  G: man | woman | whichever
16
18
  T: either [a] man or [a] woman
@@ -70,3 +72,5 @@ but pass `input: 'hierogloss'` as an argument:
70
72
  3. Commit your changes (`git commit -am 'Add some feature'`)
71
73
  4. Push to the branch (`git push origin my-new-feature`)
72
74
  5. Create new Pull Request
75
+
76
+ [kramdown]: http://kramdown.gettalong.org/
@@ -3,7 +3,7 @@
3
3
  This example is based on one in Allen's excellent [Middle Egyptian: An
4
4
  Introduction to the Language and Culture of Hieroglyphs][allen].
5
5
 
6
- H: π“Šƒπ“€€π“€ | π“Šƒπ“π“ | 𓂋𓏀π“Šͺπ“…±
6
+ H: z:A1*Z1 | π“Šƒ:𓏏*𓁐 | π“‚‹:𓏀-π“Šͺ:π“…±
7
7
  L: s | s.t | r-pw
8
8
  G: man | woman | whichever
9
9
  T: either [a] man or [a] woman
data/hierogloss.gemspec CHANGED
@@ -19,6 +19,7 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ["lib"]
20
20
 
21
21
  spec.add_dependency "kramdown", "~> 1.3"
22
+ spec.add_dependency "parslet", "~> 1.4"
22
23
  spec.add_development_dependency "prawn", "~> 0.14.0"
23
24
  spec.add_development_dependency "bundler", "~> 1.3"
24
25
  spec.add_development_dependency "rake"
@@ -6,34 +6,52 @@ module Hierogloss
6
6
  DATA_DIR = File.join(File.dirname(__FILE__), '..', '..', 'data')
7
7
  MDC_MAPPING_PATH = File.join(DATA_DIR, "Unicode-MdC-Mapping-v1.utf8")
8
8
 
9
- GARDINER = {}
9
+ SIGN_TO_GARDINER = {}
10
+ MDC_TO_SIGN = {}
11
+ SIGN_TO_MDC = {}
12
+
10
13
  File.open(MDC_MAPPING_PATH, "r:bom|utf-8") do |f|
11
14
  f.each_line do |l|
12
15
  l.chomp!
13
16
  sign, hex, codes, remarks = l.split(/\t/, 4)
14
17
  for code in codes.split(/ /)
15
- next unless code =~ /\A[A-Z][0-9]+\z/
16
- GARDINER[sign] = code
18
+ MDC_TO_SIGN[code] = sign
19
+ # Unliterals.
20
+ SIGN_TO_MDC[sign] = code if code.length == 1
21
+ # Gardiner codes, and composite signs starting with Gardiner codes.
22
+ next unless code =~ /\A[A-Z][0-9]+([-:*].*)?\z/
23
+ SIGN_TO_GARDINER[sign] = code
24
+ SIGN_TO_MDC[sign] ||= code
17
25
  end
18
26
  end
19
27
  end
20
- "𓄿𓇋𓏭𓂝𓅱𓏲𓃀π“Šͺπ“†‘π“…“π“ˆ–π“‚‹π“‰”π“Ž›π“π“„‘π“Šƒπ“‹΄π“ˆ™π“ˆŽπ“Ž‘π“ŽΌπ“π“Ώπ“‚§π“†“".each_char do |c|
21
- GARDINER.delete(c)
22
- end
23
28
 
24
- # Try to kick things into shape for hierogl.ch.
25
- def self.headword(word)
26
- hw = word
27
- hw.gsub!(/[()]/, '')
28
- hw.sub!(/=.*\z/, '')
29
- hw.sub!(/\.w?t\z/, 't')
30
- hw.sub!(/\..*\z/, '')
31
- hw
32
- end
29
+ class << self
30
+ # Try to kick things into shape for hierogl.ch.
31
+ def headword(word)
32
+ hw = word
33
+ hw.gsub!(/[()]/, '')
34
+ hw.sub!(/=.*\z/, '')
35
+ hw.sub!(/\.w?t\z/, 't')
36
+ hw.sub!(/\..*\z/, '')
37
+ hw
38
+ end
33
39
 
34
- # Given a Unicode hieroglyph, get the corresponding Gardiner sign.
35
- def self.gardiner(sign)
36
- GARDINER[sign]
40
+ # Given a Unicode hieroglyph, get the corresponding Gardiner sign.
41
+ def sign_to_gardiner(sign)
42
+ SIGN_TO_GARDINER[sign]
43
+ end
44
+
45
+ # Convert a Manuel de Codage transliteration to the corresponding Unicode
46
+ # sign.
47
+ def mdc_to_sign(mdc)
48
+ MDC_TO_SIGN[mdc]
49
+ end
50
+
51
+ # Convert a Unicode hieroglyph to a reasonable MdC representation.
52
+ def sign_to_mdc(sign)
53
+ SIGN_TO_MDC[sign]
54
+ end
37
55
  end
38
56
  end
39
57
  end
@@ -7,6 +7,7 @@ module Hierogloss
7
7
  #:nodoc:
8
8
  class Row
9
9
  attr_reader :raw_cells
10
+ alias :cells :raw_cells
10
11
 
11
12
  def initialize(row_text)
12
13
  @raw_cells = row_text.split(/\|/).map {|c| c.strip }
@@ -29,7 +30,7 @@ module Hierogloss
29
30
  def to_kramdown
30
31
  attrs = attributes
31
32
  tr = Kramdown::Element.new(:tr, nil, attrs)
32
- raw_cells.each do |c|
33
+ cells.each do |c|
33
34
  td = Kramdown::Element.new(:td)
34
35
  children = cell_to_kramdown(c)
35
36
  if children.kind_of?(Array)
@@ -59,14 +60,21 @@ module Hierogloss
59
60
 
60
61
  #:nodoc:
61
62
  class HieroglyphRow < Row
63
+ UNLINKED = {}
64
+ "𓄿𓇋𓏭𓂝𓅱𓏲𓃀π“Šͺπ“†‘π“…“π“ˆ–π“‚‹π“‰”π“Ž›π“π“„‘π“Šƒπ“‹΄π“ˆ™π“ˆŽπ“Ž‘π“ŽΌπ“π“Ώπ“‚§π“†“".each_char {|c| UNLINKED[c] = true }
65
+
62
66
  def class_attr
63
67
  'hgls-h'
64
68
  end
65
69
 
70
+ def cells
71
+ @cells ||= raw_cells.map {|c| Hierogloss::MdC.parse(c) }
72
+ end
73
+
66
74
  def cell_to_kramdown(cell)
67
- cell.chars.map do |c|
68
- gardiner = Dictionary.gardiner(c)
69
- if !gardiner.nil?
75
+ cell.to_linear_hieroglyphs.chars.map do |c|
76
+ gardiner = Dictionary.sign_to_gardiner(c)
77
+ unless gardiner.nil? || UNLINKED[c]
70
78
  search_link("Signe:#{gardiner}", c)
71
79
  else
72
80
  Kramdown::Element.new(:text, c)
@@ -0,0 +1,144 @@
1
+ require 'parslet'
2
+
3
+ module Hierogloss
4
+ #:nodoc: Our parser for the Manuel de Codage format.
5
+ module MdC
6
+ class Block
7
+ end
8
+
9
+ class Sign < Block
10
+ attr_reader :name
11
+
12
+ def initialize(name)
13
+ @name = name
14
+ end
15
+
16
+ def to_unicode
17
+ Hierogloss::Dictionary.mdc_to_sign(name) || name
18
+ end
19
+
20
+ def to_debug
21
+ name
22
+ end
23
+
24
+ def to_linear_hieroglyphs
25
+ to_unicode
26
+ end
27
+
28
+ def to_mdc(precedence)
29
+ mdc = Hierogloss::Dictionary.sign_to_mdc(name) || name
30
+ # Wrap composite signs in parens.
31
+ return "(#{mdc})" if mdc =~ /[-:*]/
32
+ mdc
33
+ end
34
+ end
35
+
36
+ class Group < Block
37
+ attr_reader :blocks
38
+
39
+ def initialize(blocks)
40
+ @blocks = blocks
41
+ end
42
+
43
+ def to_debug
44
+ blocks.map {|b| b.to_debug }
45
+ end
46
+
47
+ def to_linear_hieroglyphs
48
+ blocks.map {|b| b.to_linear_hieroglyphs }
49
+ end
50
+
51
+ protected
52
+
53
+ # This whole precedence business may need more test cases further work.
54
+ def maybe_parens(current, context, str)
55
+ if current < context
56
+ "(#{str})"
57
+ else
58
+ str
59
+ end
60
+ end
61
+ end
62
+
63
+ class Sequence < Group
64
+ def to_mdc(precedence)
65
+ maybe_parens(2, precedence, blocks.map {|b| b.to_mdc(2) }.join("*"))
66
+ end
67
+ end
68
+
69
+ class Stack < Group
70
+ def to_debug
71
+ [:stack].concat(super)
72
+ end
73
+
74
+ def to_mdc(precedence)
75
+ maybe_parens(1, precedence, blocks.map {|b| b.to_mdc(1) }.join(":"))
76
+ end
77
+ end
78
+
79
+ class Quadrats < Group
80
+ # Actually render to a string here.
81
+ def to_linear_hieroglyphs
82
+ super.flatten.join
83
+ end
84
+
85
+ def to_mdc
86
+ blocks.map {|b| b.to_mdc(0) }.join("-")
87
+ end
88
+ end
89
+
90
+ class Parser < Parslet::Parser
91
+ # Whitespace and equivalent delimiters.
92
+ rule(:space) { match('[-_ ]').repeat(1) }
93
+ rule(:space?) { space.maybe }
94
+
95
+ # Signs.
96
+ rule(:alpha_sign) { match('[A-Za-z0-9]').repeat(1) }
97
+ rule(:unicode_sign) { match('[\u{13000}-\u{1342F}]') }
98
+ rule(:sign) { (alpha_sign | unicode_sign).as(:sign) >> space? }
99
+
100
+ # Parenthesized blocks.
101
+ rule(:parens) { str('(') >> space? >> sequence >> str(')') >> space? }
102
+
103
+ # "Terminal" chunks in our expression grammar, which will match
104
+ # an actual, concrete symbol in the first position.
105
+ rule(:atomic) { sign | parens }
106
+
107
+ # A list of items with separators between them.
108
+ def separated(item, separator)
109
+ (item.as(:head) >> (separator >> item).repeat.as(:rest))
110
+ end
111
+
112
+ # Nested lists of signs separated by "*".
113
+ rule(:juxtaposed) { separated(atomic, str('*')).as(:juxtaposed) }
114
+
115
+ # Stacks of signs separated by ":".
116
+ rule(:stack) { separated(juxtaposed, str(':')).as(:stack) }
117
+
118
+ rule(:sequence) { stack.repeat }
119
+ root(:sequence)
120
+ end
121
+
122
+ class Transform < Parslet::Transform
123
+ # If we only have one item, we don't need to build an extra wrapper
124
+ # class; we can just pass it up.
125
+ def self.lists_as(klass, list)
126
+ if list.length == 1
127
+ list.first
128
+ else
129
+ klass.new(list)
130
+ end
131
+ end
132
+
133
+ rule(head: subtree(:head), rest: sequence(:rest)) { [head].concat(rest) }
134
+ rule(sign: simple(:sign)) { Sign.new(sign.to_s) }
135
+ rule(stack: subtree(:list)) {|d| lists_as(Stack, d[:list]) }
136
+ rule(juxtaposed: subtree(:list)) {|d| lists_as(Sequence, d[:list]) }
137
+ end
138
+
139
+ def self.parse(input)
140
+ parsed = Parser.new.parse(input)
141
+ Quadrats.new(Transform.new.apply(parsed))
142
+ end
143
+ end
144
+ end