epos 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +674 -0
- data/README.md +45 -0
- data/README.pt-br.md +43 -0
- data/epico.png +0 -0
- data/epos.gemspec +20 -0
- data/examples/web.rb +9 -0
- data/lib/epos/cp/f1.rb +14 -0
- data/lib/epos/cp/f10.rb +18 -0
- data/lib/epos/cp/f11.rb +51 -0
- data/lib/epos/cp/f12.rb +19 -0
- data/lib/epos/cp/f13.rb +21 -0
- data/lib/epos/cp/f16.rb +8 -0
- data/lib/epos/cp/f2.rb +55 -0
- data/lib/epos/cp/f3.rb +19 -0
- data/lib/epos/cp/f4.rb +19 -0
- data/lib/epos/cp/f6.rb +34 -0
- data/lib/epos/cp/f7.rb +19 -0
- data/lib/epos/cp/f8.rb +29 -0
- data/lib/epos/cp/f9.rb +14 -0
- data/lib/epos/data-file.rb +28 -0
- data/lib/epos/dictionary.rb +104 -0
- data/lib/epos/encoded-file.rb +38 -0
- data/lib/epos/entry-parser.rb +157 -0
- data/lib/epos/html/attrs.slim +14 -0
- data/lib/epos/html/defin-body.slim +8 -0
- data/lib/epos/html/defins.slim +8 -0
- data/lib/epos/html/entry.slim +7 -0
- data/lib/epos/html/extra-tab.slim +3 -0
- data/lib/epos/html/headword.slim +14 -0
- data/lib/epos/html/idioms-tab.slim +8 -0
- data/lib/epos/html/senses-tab.slim +8 -0
- data/lib/epos/html/style.css +54 -0
- data/lib/epos/html/symbols-tab.slim +4 -0
- data/lib/epos/html-formatter.rb +109 -0
- data/lib/epos/index-file.rb +35 -0
- data/lib/epos/indexed-data-file.rb +25 -0
- data/lib/epos/search.rb +7 -0
- data/lib/epos/text-parser.rb +184 -0
- data/lib/epos.rb +9 -0
- metadata +97 -0
data/README.md
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
Epos
|
2
|
+
====
|
3
|
+
|
4
|
+
Epos, from the Greek ἔπος (“word”), is a Ruby gem that gives you access to the
|
5
|
+
data files of the excellent
|
6
|
+
[Houaiss dictionary](http://www.objetiva.com.br/livro_ficha.php?id=707).
|
7
|
+
|
8
|
+
Please notice that you must
|
9
|
+
[buy the dictionary](http://www.objetiva.com.br/livro_ficha.php?id=707) in order
|
10
|
+
to use this gem.
|
11
|
+
|
12
|
+
Features
|
13
|
+
--------
|
14
|
+
|
15
|
+
* Entries can be retrieved as HTML.
|
16
|
+
* Both the main dictionary and the morpheme one are supported.
|
17
|
+
* There's a parser for the structure used by entries.
|
18
|
+
* There's a parser for the RTF-like format used by the text.
|
19
|
+
* Characters are rendered correctly, even those in the etymology section or in
|
20
|
+
the pronunciation of foreign words.
|
21
|
+
|
22
|
+
Example
|
23
|
+
-------
|
24
|
+
|
25
|
+
The following example looks up the word *épico* and saves it to a file:
|
26
|
+
|
27
|
+
require 'epos'
|
28
|
+
dict = Epos::Dictionary.new("/home/user/houaiss")
|
29
|
+
File.write("épico.html", dict.look_up("épico"))
|
30
|
+
|
31
|
+
This is what it should look like:
|
32
|
+
|
33
|
+
![épico](epico.png)
|
34
|
+
|
35
|
+
Limitations
|
36
|
+
-----------
|
37
|
+
|
38
|
+
* No support for animal sounds, verb conjugation, etc.
|
39
|
+
* No nice interface.
|
40
|
+
|
41
|
+
Compatibility
|
42
|
+
-------------
|
43
|
+
|
44
|
+
I know it works with version 2009.6 (July 2013). I don't know about any other
|
45
|
+
version.
|
data/README.pt-br.md
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
Epos
|
2
|
+
====
|
3
|
+
|
4
|
+
Epos, do grego ἔπος (“palavra”), é uma gem de Ruby que dá acesso aos arquivos de
|
5
|
+
dados do excelente
|
6
|
+
[dicionário Houaiss eletrônico da língua portuguesa](http://www.objetiva.com.br/livro_ficha.php?id=707).
|
7
|
+
|
8
|
+
Note que você precisa
|
9
|
+
[comprar o dicionário](http://www.objetiva.com.br/livro_ficha.php?id=707) para
|
10
|
+
usar esta gem.
|
11
|
+
|
12
|
+
Funcionalidades
|
13
|
+
---------------
|
14
|
+
|
15
|
+
* Os verbetes podem ser convertidos para HTML.
|
16
|
+
* Há um analisador para a estrutura usada nos verbetes.
|
17
|
+
* Há um analisador para o formato do texto, que é semelhante ao RTF.
|
18
|
+
* Os caracteres aparecem corretamente, mesmo aqueles na parte de etimologia e na
|
19
|
+
pronúncia de palavras estrangeiras.
|
20
|
+
|
21
|
+
Exemplo
|
22
|
+
-------
|
23
|
+
|
24
|
+
O seguinet exemplo busca a definição da palavra *épico* e a salva num arquivo:
|
25
|
+
|
26
|
+
require 'epos'
|
27
|
+
dict = Epos::Dictionary.new("/home/user/houaiss")
|
28
|
+
File.write("épico.html", dict.look_up("épico"))
|
29
|
+
|
30
|
+
Deve gerar algo assim:
|
31
|
+
|
32
|
+
![épico](epico.png)
|
33
|
+
|
34
|
+
Limitações
|
35
|
+
----------
|
36
|
+
|
37
|
+
* Não há suporte às vozes de animais, à conjugação verbal, etc.
|
38
|
+
* Não há uma interface amigável.
|
39
|
+
|
40
|
+
Compatibilidade
|
41
|
+
---------------
|
42
|
+
|
43
|
+
Sei que funciona com a versão 2009.6 (Julho de 2013). Não sei sobre as outras.
|
data/epico.png
ADDED
Binary file
|
data/epos.gemspec
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
Gem::Specification.new do |s|
|
3
|
+
s.name = 'epos'
|
4
|
+
s.version = '0.9.0'
|
5
|
+
s.date = '2014-09-05'
|
6
|
+
s.summary = "Access to the Houaiss dictionary data files"
|
7
|
+
s.description = "Provides access to the Houaiss dictionary data files. Entries can be exported as HTML."
|
8
|
+
s.authors = ["Adiel Mittmann"]
|
9
|
+
s.email = 'adiel@mittmann.net.br'
|
10
|
+
s.homepage = 'https://github.com/adiel-mittmann/epos'
|
11
|
+
s.license = 'GPL3'
|
12
|
+
|
13
|
+
s.files = %w(LICENSE README.md README.pt-br.md epico.png epos.gemspec) +
|
14
|
+
Dir["lib/**/*.rb"] +
|
15
|
+
Dir["lib/**/*.slim"] +
|
16
|
+
Dir["lib/**/*.css"] +
|
17
|
+
Dir["examples/**/*.rb"]
|
18
|
+
|
19
|
+
s.add_dependency 'slim', '>= 2.0.0'
|
20
|
+
end
|
data/examples/web.rb
ADDED
data/lib/epos/cp/f1.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
module Epos
|
3
|
+
class CodePage
|
4
|
+
F1_MAP = {
|
5
|
+
"¬" => "-́",
|
6
|
+
"\u0096" => "—",
|
7
|
+
"\u0097" => "—",
|
8
|
+
"\u0086" => "✝",
|
9
|
+
"\u0093" => "“",
|
10
|
+
"\u0094" => "”",
|
11
|
+
"\u009c" => "œ", # A mistake, should be f6, e.g. "causeur".
|
12
|
+
}
|
13
|
+
end
|
14
|
+
end
|
data/lib/epos/cp/f10.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
module Epos
|
3
|
+
class CodePage
|
4
|
+
F10_MAP = {
|
5
|
+
"t" => "ṭ",
|
6
|
+
"h" => "ḥ",
|
7
|
+
"s" => "ṣ",
|
8
|
+
"H" => "Ḥ",
|
9
|
+
"d" => "ḍ",
|
10
|
+
"r" => "ṛ",
|
11
|
+
"S" => "Ṣ",
|
12
|
+
"n" => "ṇ",
|
13
|
+
"z" => "ẓ",
|
14
|
+
"m" => "ṃ",
|
15
|
+
"l" => "ḷ",
|
16
|
+
}
|
17
|
+
end
|
18
|
+
end
|
data/lib/epos/cp/f11.rb
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
module Epos
|
3
|
+
class CodePage
|
4
|
+
F11_MAP = {
|
5
|
+
"®" => "→",
|
6
|
+
"¾" => "—",
|
7
|
+
"a" => "α",
|
8
|
+
"b" => "β",
|
9
|
+
"c" => "χ",
|
10
|
+
"d" => "δ",
|
11
|
+
"Ç" => "∩",
|
12
|
+
"D" => "Δ",
|
13
|
+
"e" => "ε",
|
14
|
+
"È" => "∪",
|
15
|
+
"f" => "φ",
|
16
|
+
"F" => "Φ",
|
17
|
+
"g" => "γ",
|
18
|
+
"G" => "Γ",
|
19
|
+
"h" => "η",
|
20
|
+
"i" => "ι",
|
21
|
+
"k" => "κ",
|
22
|
+
"l" => "λ",
|
23
|
+
"m" => "μ",
|
24
|
+
"Ñ" => "∇",
|
25
|
+
"o" => "ο",
|
26
|
+
"ò" => "ʃ",
|
27
|
+
"Ö" => "√",
|
28
|
+
"º" => "≡",
|
29
|
+
"p" => "π",
|
30
|
+
"q" => "θ",
|
31
|
+
"Q" => "Θ",
|
32
|
+
"r" => "ρ",
|
33
|
+
"s" => "σ",
|
34
|
+
"S" => "Σ",
|
35
|
+
"t" => "τ",
|
36
|
+
"u" => "υ",
|
37
|
+
"U" => "Υ",
|
38
|
+
"V" => "ζ", # "dzeta". Houaiss shows "ς".
|
39
|
+
"w" => "ω",
|
40
|
+
"W" => "Ω",
|
41
|
+
"x" => "ξ",
|
42
|
+
"X" => "Ξ",
|
43
|
+
"y" => "ψ",
|
44
|
+
"Y" => "Ψ",
|
45
|
+
"." => ".",
|
46
|
+
"L" => "Λ",
|
47
|
+
"n" => "ν",
|
48
|
+
"P" => "Π",
|
49
|
+
}
|
50
|
+
end
|
51
|
+
end
|
data/lib/epos/cp/f12.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
module Epos
|
3
|
+
class CodePage
|
4
|
+
F12_MAP = {
|
5
|
+
"¿" => "♭",
|
6
|
+
"Æ" => "𝅘𝅥𝅲",
|
7
|
+
"x" => "𝅘𝅥𝅯",
|
8
|
+
"w" => "𝅝",
|
9
|
+
"r" => "𝅘𝅥𝅰",
|
10
|
+
"q" => "𝅘𝅥",
|
11
|
+
"k" => "♮",
|
12
|
+
"h" => "𝅗𝅥",
|
13
|
+
"e" => "𝅘𝅥𝅮",
|
14
|
+
"B" => "𝄡",
|
15
|
+
"?" => "𝄢",
|
16
|
+
"&" => "𝄞",
|
17
|
+
}
|
18
|
+
end
|
19
|
+
end
|
data/lib/epos/cp/f13.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
module Epos
|
3
|
+
class CodePage
|
4
|
+
F13_MAP = {
|
5
|
+
"ì" => "ĭ",
|
6
|
+
"á" => "ā",
|
7
|
+
"í" => "ī",
|
8
|
+
"ó" => "ō",
|
9
|
+
"è" => "ĕ",
|
10
|
+
"ý" => "ȳ",
|
11
|
+
"ù" => "ŭ",
|
12
|
+
"é" => "ē",
|
13
|
+
"ú" => "ū",
|
14
|
+
"ò" => "ŏ",
|
15
|
+
"à" => "ă",
|
16
|
+
"ø" => "ø",
|
17
|
+
"u" => "u",
|
18
|
+
"i" => "i",
|
19
|
+
}
|
20
|
+
end
|
21
|
+
end
|
data/lib/epos/cp/f16.rb
ADDED
data/lib/epos/cp/f2.rb
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
module Epos
|
3
|
+
class CodePage
|
4
|
+
F2_MAP = {
|
5
|
+
"Á" => "Ā",
|
6
|
+
"Ó" => "Ō",
|
7
|
+
"á" => "ā",
|
8
|
+
"é" => "ē",
|
9
|
+
"í" => "ī",
|
10
|
+
"ó" => "ō",
|
11
|
+
"ú" => "ū",
|
12
|
+
"ý" => "ȳ",
|
13
|
+
"à" => "ă",
|
14
|
+
"è" => "ĕ",
|
15
|
+
"ì" => "ĭ",
|
16
|
+
"ò" => "ŏ",
|
17
|
+
"ù" => "ŭ",
|
18
|
+
"þ" => "y̆",
|
19
|
+
"â" => "ā́",
|
20
|
+
"ê" => "ḗ",
|
21
|
+
"ô" => "ṓ",
|
22
|
+
"î" => "ī́",
|
23
|
+
"ë" => "ẹ",
|
24
|
+
"ö" => "ọ",
|
25
|
+
"ü" => "ụ",
|
26
|
+
"k" => "k",
|
27
|
+
"h" => "h",
|
28
|
+
"d" => "d",
|
29
|
+
"n" => "n",
|
30
|
+
"o" => "o",
|
31
|
+
"c" => "c",
|
32
|
+
"e" => "e",
|
33
|
+
"m" => "m",
|
34
|
+
"i" => "i",
|
35
|
+
"r" => "r",
|
36
|
+
"l" => "l",
|
37
|
+
"u" => "u",
|
38
|
+
"s" => "s",
|
39
|
+
"a" => "a",
|
40
|
+
"i" => "i",
|
41
|
+
"t" => "t",
|
42
|
+
"g" => "g",
|
43
|
+
"y" => "y",
|
44
|
+
">" => ">",
|
45
|
+
"'" => "'",
|
46
|
+
"," => ",",
|
47
|
+
"-" => "-",
|
48
|
+
" " => " ",
|
49
|
+
")" => ")",
|
50
|
+
"\u0096" => "—",
|
51
|
+
"\u0097" => "—",
|
52
|
+
"¬" => "-́",
|
53
|
+
}
|
54
|
+
end
|
55
|
+
end
|
data/lib/epos/cp/f3.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
module Epos
|
3
|
+
class CodePage
|
4
|
+
F3_MAP = {
|
5
|
+
"\u0096" => "-́",
|
6
|
+
"g" => "ĝ",
|
7
|
+
"e" => "ẽ",
|
8
|
+
"i" => "ĩ",
|
9
|
+
"u" => "ũ",
|
10
|
+
"¬" => "-",
|
11
|
+
"-" => "-",
|
12
|
+
"j" => "j̈",
|
13
|
+
"s" => "s",
|
14
|
+
"y" => "ỹ",
|
15
|
+
" " => " ",
|
16
|
+
"k" => "ǩ",
|
17
|
+
}
|
18
|
+
end
|
19
|
+
end
|
data/lib/epos/cp/f4.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
module Epos
|
3
|
+
# "y" is used in two entries: "evonim-" and "mormir(o)-". In both cases, it
|
4
|
+
# is shown as a "y" with something weird below. In the first entry, it is
|
5
|
+
# used to represent the *short* "y" in "euonymus"; in the second, the *long*
|
6
|
+
# "y" in "mormyr". So...
|
7
|
+
class CodePage
|
8
|
+
F4_MAP = {
|
9
|
+
"H" => "ʰ",
|
10
|
+
"N" => "ⁿ",
|
11
|
+
"T" => "ᵗ",
|
12
|
+
"S" => "ˢ",
|
13
|
+
"J" => "ʲ",
|
14
|
+
"ï" => "ï",
|
15
|
+
"D" => "ᵈ",
|
16
|
+
"y" => "y",
|
17
|
+
}
|
18
|
+
end
|
19
|
+
end
|
data/lib/epos/cp/f6.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
module Epos
|
3
|
+
class CodePage
|
4
|
+
F6_MAP = {
|
5
|
+
"«" => "ə",
|
6
|
+
'"' => "ɪ",
|
7
|
+
"E" => "ɛ",
|
8
|
+
"S" => "ʃ",
|
9
|
+
"\u0086" => "ɔ",
|
10
|
+
"A" => "ɑ",
|
11
|
+
"N" => "ŋ",
|
12
|
+
"Z" => "ʒ",
|
13
|
+
"Ã" => "ʌ",
|
14
|
+
"¡" => "ɑ̃",
|
15
|
+
"Î" => "ɜ",
|
16
|
+
"é" => "ɛ̃",
|
17
|
+
"U" => "ʊ",
|
18
|
+
"\u009c" => "œ",
|
19
|
+
"ø" => "ɲ",
|
20
|
+
"Ö" => "ʕ",
|
21
|
+
"ç" => "ɥ",
|
22
|
+
"|" => "ɫ",
|
23
|
+
"ö" => "ɨ",
|
24
|
+
"¢" => "ɔ̃",
|
25
|
+
"´" => "ʎ",
|
26
|
+
"l" => "ḷ",
|
27
|
+
"â" => "a͡a",
|
28
|
+
"¸" => "ɸ",
|
29
|
+
"D" => "ð",
|
30
|
+
"\u008a" => "ș",
|
31
|
+
"t" => "ț",
|
32
|
+
}
|
33
|
+
end
|
34
|
+
end
|
data/lib/epos/cp/f7.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
module Epos
|
3
|
+
class CodePage
|
4
|
+
F7_MAP = {
|
5
|
+
"r" => "ɹ",
|
6
|
+
"e" => "ẹ",
|
7
|
+
"·" => "ẹ",
|
8
|
+
"\u0096" => "o̧",
|
9
|
+
"¢" => "ɔ̃",
|
10
|
+
"m" => "ɯ",
|
11
|
+
"¡" => "ɑ̃",
|
12
|
+
"ø" => "ɲ",
|
13
|
+
"«" => "ə",
|
14
|
+
"w" => "ẅ",
|
15
|
+
"M" => ">", # "antilambda", but see http://en.wikipedia.org/wiki/Diple_(textual_symbol)
|
16
|
+
"A" => "ɑ",
|
17
|
+
}
|
18
|
+
end
|
19
|
+
end
|
data/lib/epos/cp/f8.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
module Epos
|
3
|
+
class CodePage
|
4
|
+
# Characters in this page produce a line above some of the characters that
|
5
|
+
# precede them. Their purpose is to indicate the pitch in Japanese words.
|
6
|
+
# Lowercase letters indicate that the pitch remains the same, uppercase that
|
7
|
+
# it drops. The closer a letter is to the beginning of the alphabet, the
|
8
|
+
# shorter the line that it produces.
|
9
|
+
#
|
10
|
+
# The length of the line does not match well letter boundaries. It's
|
11
|
+
# difficult to establish how many letters should be affected.
|
12
|
+
F8_MAP = {
|
13
|
+
"c" => "",
|
14
|
+
"d" => "",
|
15
|
+
"e" => "",
|
16
|
+
"f" => "",
|
17
|
+
"h" => "",
|
18
|
+
"j" => "",
|
19
|
+
|
20
|
+
"A" => "˺",
|
21
|
+
"C" => "˺",
|
22
|
+
"D" => "˺",
|
23
|
+
"E" => "˺",
|
24
|
+
"F" => "˺",
|
25
|
+
"G" => "˺",
|
26
|
+
"H" => "˺",
|
27
|
+
}
|
28
|
+
end
|
29
|
+
end
|
data/lib/epos/cp/f9.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'epos/encoded-file.rb'
|
2
|
+
|
3
|
+
module Epos
|
4
|
+
|
5
|
+
class DataFile
|
6
|
+
|
7
|
+
def initialize(path)
|
8
|
+
@file = EncodedFile.new(path)
|
9
|
+
end
|
10
|
+
|
11
|
+
def read_entry(pos)
|
12
|
+
@file.seek(pos + 1)
|
13
|
+
s = "*"
|
14
|
+
while !@file.eof?
|
15
|
+
t = @file.read(1024)
|
16
|
+
break if t == nil || t == ""
|
17
|
+
s << t
|
18
|
+
i = s.index("\n*")
|
19
|
+
if i
|
20
|
+
s = s[0..i]
|
21
|
+
break
|
22
|
+
end
|
23
|
+
end
|
24
|
+
return s
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'epos/indexed-data-file.rb'
|
3
|
+
require 'epos/html-formatter.rb'
|
4
|
+
require 'epos/entry-parser.rb'
|
5
|
+
|
6
|
+
module Epos
|
7
|
+
class Dictionary
|
8
|
+
|
9
|
+
def initialize(path)
|
10
|
+
@idf1 = Epos::IndexedDataFile.new(File.join(path, "deah002.dhn"), File.join(path, "deah001.dhn"))
|
11
|
+
@idf2 = Epos::IndexedDataFile.new(File.join(path, "deah008.dhn"), File.join(path, "deah007.dhn"))
|
12
|
+
|
13
|
+
@fmt1 = Epos::HtmlFormatter.new(unmarked: false)
|
14
|
+
@fmt2 = Epos::HtmlFormatter.new(unmarked: true)
|
15
|
+
|
16
|
+
@entry_parser = Epos::EntryParser.new
|
17
|
+
|
18
|
+
@fuzzy = {}
|
19
|
+
|
20
|
+
(@idf1.keys + @idf2.keys).each do |key|
|
21
|
+
simple = simplify(key)
|
22
|
+
if @fuzzy.has_key?(simple)
|
23
|
+
@fuzzy[simple] << key
|
24
|
+
else
|
25
|
+
@fuzzy[simple] = [key]
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
@fuzzy.each_value do |value|
|
30
|
+
value.sort!.uniq!
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def look_up(word, level: 1, fragment: false)
|
35
|
+
html = ""
|
36
|
+
simple_word = simplify(word)
|
37
|
+
used = {}
|
38
|
+
|
39
|
+
if level >= 0
|
40
|
+
html << look_up_and_format(word)
|
41
|
+
used[word] = true
|
42
|
+
end
|
43
|
+
|
44
|
+
if level >= 1
|
45
|
+
(@fuzzy[simple_word] || []).each do |actual|
|
46
|
+
if !used[actual]
|
47
|
+
html << look_up_and_format(actual)
|
48
|
+
used[actual] = true
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
if level >= 2
|
54
|
+
@fuzzy.each do |simple, words|
|
55
|
+
if simple.include?(simple_word)
|
56
|
+
words.each do |actual|
|
57
|
+
if !used[actual]
|
58
|
+
html << look_up_and_format(actual)
|
59
|
+
used[actual] = true
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
if !fragment
|
67
|
+
if html.length == 0
|
68
|
+
html = "<i>No results were found for your query.</i>"
|
69
|
+
end
|
70
|
+
html = %Q{
|
71
|
+
<!DOCTYPE html>
|
72
|
+
<html>
|
73
|
+
<head>
|
74
|
+
<meta charset='utf-8'>
|
75
|
+
<title>Epos</title>
|
76
|
+
<style type='text/css'>#{self.style}</style>
|
77
|
+
</head>
|
78
|
+
<body>
|
79
|
+
#{html}
|
80
|
+
</body>
|
81
|
+
</html>
|
82
|
+
}
|
83
|
+
end
|
84
|
+
|
85
|
+
return html
|
86
|
+
end
|
87
|
+
|
88
|
+
def style
|
89
|
+
@fmt1.style
|
90
|
+
end
|
91
|
+
|
92
|
+
def look_up_and_format(word)
|
93
|
+
s = ""
|
94
|
+
s << @idf1.look_up(word).map{|text| @fmt1.format(@entry_parser.parse(text))}.join
|
95
|
+
s << @idf2.look_up(word).map{|text| @fmt2.format(@entry_parser.parse(text))}.join
|
96
|
+
return s
|
97
|
+
end
|
98
|
+
|
99
|
+
def simplify(s)
|
100
|
+
s.tr("áéíóúâêôãõàüçÁÉÍÓÚÂÊÔÃÕÀÜÇ", "aeiouaeoaoaucaeiouaeoaoauc").gsub(/[ -\\(\\)]/, "")
|
101
|
+
end
|
102
|
+
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Epos
|
2
|
+
class EncodedFile
|
3
|
+
|
4
|
+
def initialize(path)
|
5
|
+
@io = File.open(path, "rb")
|
6
|
+
end
|
7
|
+
|
8
|
+
def read(length = nil)
|
9
|
+
self.decipher(@io.read(length)).encode("utf-8", "iso-8859-1")
|
10
|
+
end
|
11
|
+
|
12
|
+
def seek(pos)
|
13
|
+
@io.seek(pos)
|
14
|
+
end
|
15
|
+
|
16
|
+
def close
|
17
|
+
@io.close
|
18
|
+
end
|
19
|
+
|
20
|
+
def eof?
|
21
|
+
@io.eof?
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.read(path, length = nil)
|
25
|
+
file = EncodedFile.new(path)
|
26
|
+
data = file.read(length)
|
27
|
+
file.close
|
28
|
+
return data
|
29
|
+
end
|
30
|
+
|
31
|
+
protected
|
32
|
+
|
33
|
+
def decipher(data)
|
34
|
+
data.bytes.map{|b| b = (b + 0x0b) % 255; b == 11 ? 10 : b}.pack("C*")
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|