pxindex-builder 0.1.2 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- checksums.yaml.gz.sig +0 -0
- data/lib/pxindex-builder.rb +50 -12
- data.tar.gz.sig +0 -0
- metadata +56 -32
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 8d16d5f32fb73bc13efabdea2aa410e492cd25a58d69de231b859add956ea537
|
4
|
+
data.tar.gz: b618cd9d89f9719ca8e27eef1941af060cef59e044acb38daf4e0d1bdc16d59e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c98340121ac8b0633126ad2921a9426f6b514b61bc7a57f13da0b0e0060cb1d5b597dd7418ea39c2b837d6dd67050c72b2ce9231656c6a57beb384b66d8994e4
|
7
|
+
data.tar.gz: c9ff7177b2616fce22d6d0f2508ec8acd9c9816257c92183a2fe54fcbb1803669672dccb4d5faf2e7ef94d1a14e180f1a5106b61e7ceb05c05fa88e87a483345
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/lib/pxindex-builder.rb
CHANGED
@@ -2,26 +2,63 @@
|
|
2
2
|
|
3
3
|
# file: pxindex-builder.rb
|
4
4
|
|
5
|
+
require 'line-tree' # *new*
|
5
6
|
require 'wordsdotdat'
|
6
7
|
require 'phrase_lookup'
|
7
8
|
require 'polyrex-builder'
|
9
|
+
require 'rxfreader'
|
8
10
|
|
9
11
|
|
10
12
|
class PxIndexBuilder
|
11
13
|
|
12
|
-
attr_reader :to_xml, :to_h
|
14
|
+
attr_reader :to_xml, :to_h, :to_s
|
13
15
|
|
14
|
-
def initialize(obj, ignore
|
16
|
+
def initialize(obj, debug: false, ignore: [])
|
17
|
+
|
18
|
+
@debug = debug
|
19
|
+
puts 'inside initialize: ' if @debug
|
20
|
+
|
21
|
+
if obj.is_a? String then
|
22
|
+
|
23
|
+
s, _ = RXFReader.read(obj)
|
24
|
+
|
25
|
+
s =~ /^---/ ? import_phrases(YAML.load(s), s, ignore) : import_index(s)
|
15
26
|
|
16
|
-
h = if obj.is_a? String then
|
17
|
-
|
18
|
-
s, _ = RXFHelper.read(obj)
|
19
|
-
YAML.load(s)
|
20
|
-
|
21
27
|
elsif obj.is_a? Hash
|
22
|
-
obj
|
28
|
+
import_phrases obj, s, ignore
|
23
29
|
end
|
24
30
|
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def import_index(raw_s)
|
37
|
+
|
38
|
+
# find the entries which aren't on the main index
|
39
|
+
s = raw_s.sub(/<[^>]+>\n/,'')
|
40
|
+
doc = LineTree.new(s, debug: @debug).to_doc(encapsulate: true)
|
41
|
+
a = doc.root.xpath('entry/text()')
|
42
|
+
puts 'a: ' + a.inspect if @debug
|
43
|
+
puts 'doc: ' + doc.xml if @debug
|
44
|
+
a2 = doc.root.xpath('entry//entry/text()')
|
45
|
+
puts 'a2: ' + a2.inspect if @debug
|
46
|
+
a3 = a2.map(&:rstrip) - a.map(&:rstrip)
|
47
|
+
puts 'a3:' + a3.inspect if @debug
|
48
|
+
|
49
|
+
# add the new entries to the main index
|
50
|
+
s << "\n" + a3.join("\n")
|
51
|
+
|
52
|
+
s.prepend '<?ph schema="entries/section[heading]/entry[title, url]"?>
|
53
|
+
|
54
|
+
'
|
55
|
+
|
56
|
+
@to_s = s
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
def import_phrases(h, s, ignore=[])
|
61
|
+
|
25
62
|
words = h.keys.join(' ').split(/ +/).map {|x| x[/\w+/]}.uniq\
|
26
63
|
#.tap {|x| puts 't: ' + x.inspect}
|
27
64
|
.reject {|x| x.length < 3}\
|
@@ -29,11 +66,11 @@ class PxIndexBuilder
|
|
29
66
|
.reject {|x| x.length < 4 and !WordsDotDat.list.include? x.downcase}\
|
30
67
|
.group_by(&:chr).sort
|
31
68
|
|
32
|
-
pl = PhraseLookup.new s
|
69
|
+
pl = PhraseLookup.new s
|
33
70
|
|
34
71
|
index = words.map do |letter, list|
|
35
72
|
|
36
|
-
a = list.map do |w|
|
73
|
+
a = list.map do |w|
|
37
74
|
phrases = pl.q(w)
|
38
75
|
[w, phrases, phrases.map {|x| h[x] }.max]
|
39
76
|
end
|
@@ -43,19 +80,20 @@ class PxIndexBuilder
|
|
43
80
|
end
|
44
81
|
|
45
82
|
@to_h = h = scan(index)
|
83
|
+
puts 'h: ' + h.inspect if @debug
|
46
84
|
@to_xml = PolyrexBuilder.new(h, parents: %i(entry)).to_xml
|
47
85
|
|
48
86
|
|
49
87
|
end
|
50
88
|
|
51
|
-
|
89
|
+
|
52
90
|
|
53
91
|
def scan(rows)
|
54
92
|
|
55
93
|
rows.map do |x|
|
56
94
|
|
57
95
|
head, body, _ = x
|
58
|
-
|
96
|
+
|
59
97
|
a = [{title: head}]
|
60
98
|
a << scan(body) if body and body.any?
|
61
99
|
a
|
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pxindex-builder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Robertson
|
@@ -10,29 +10,53 @@ bindir: bin
|
|
10
10
|
cert_chain:
|
11
11
|
- |
|
12
12
|
-----BEGIN CERTIFICATE-----
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
13
|
+
MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
|
14
|
+
YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjIwMjIzMTkwNTAxWhcN
|
15
|
+
MjMwMjIzMTkwNTAxWjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
|
16
|
+
cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQDW85TI
|
17
|
+
/Z5K4vUfBYKBHGpILk6JxCmBYJJFmYPjDQwuwfrmrU5lvPUDAdp9kQycXcKVc5Uy
|
18
|
+
XhKfobl5EFqs6x+k5Rr5W5eH4XsFy5pRyExntxcoCAyjNqHiS+kfDLG59s2ZtkBd
|
19
|
+
gFD7uS/983qC3d4lVbU2BSkUfmyFSLeEwIU123QwtDeDqG8TyajVmHVCP+Z4rn3m
|
20
|
+
m/CAGaK2jVtCl+FodEZBeZzmEQ7MfI5HKGbxXBTGplLsJlCnqsT1z1czg9TS7MQb
|
21
|
+
iYleiEzGfONtEKdXx3qxP3P4kiNTUy5ZBDrT3l9cbXDrlFTjLFhbAgekouEi2Q8M
|
22
|
+
rX2anLFWnXH4PR7B+Z+2gunVQf51m9Zas+IXJhFU2Pf6ANlak2/QDQZ/3L8A3REM
|
23
|
+
yCvvPf8ZOsycdV1ND4IUvjl9psJf4hxITq9gmfpKLR3rwK5yNK8L7t0YzDRY0wSv
|
24
|
+
eoxpVnCkhPBgYyGqbVdb8vo20HpO6upTnbYqDB6MXt6mDYXC2Fc/6eZ+h20CAwEA
|
25
|
+
AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUg0DERRht
|
26
|
+
KGlPVqCvhV483EvmfFswJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
|
27
|
+
c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
|
28
|
+
BgkqhkiG9w0BAQsFAAOCAYEAUnDHC51C7bosMGleHWQ6WmADz9c1rWcI7O7hc2O0
|
29
|
+
YVJb7lWr7mbcxdHUzbFe3FZD3lbwLpV/tPBhSWA8IopouMVKEA8wLvbAy7frwRJ8
|
30
|
+
EZjFVzeXmUDY2MAbgLOh1SYgiCTYpTbTgiCByFgxuMX1cua9vAyVw2cCa7prAoj7
|
31
|
+
kZBw5ZSbabGAjGrvqgvH2+18vt55PJEUsbprVB7KOHhs3chPvDDBLYbVotlizS/D
|
32
|
+
0zfMydUnO8GqkYSx7oked2Vx98QqJB8hQgD9gBw/g8ITbCc2RcJDMBw64synIBtM
|
33
|
+
zK1YL6OKysdroRHd7PMfui82y5TazNdbs+a0mfLslnYmuD1Qyz48G0RBf9uiM7NN
|
34
|
+
kWVg9FrcOSwREdGPQo8Nm5XBrnjNQ5kMt0dkD0hYl/6c3GlVZnNnTQtTVrI2U7/0
|
35
|
+
C5ZqDgzQ3HQLwk8KbRjlBIdMHVjHfLaqNRllK8YWKSim4ZMMaBUnTZeZE8tRUiV6
|
36
|
+
5sJpSrhbx0blESXn24F1Nx/U
|
33
37
|
-----END CERTIFICATE-----
|
34
|
-
date:
|
38
|
+
date: 2022-02-23 00:00:00.000000000 Z
|
35
39
|
dependencies:
|
40
|
+
- !ruby/object:Gem::Dependency
|
41
|
+
name: line-tree
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - "~>"
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '0.6'
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: 0.6.8
|
50
|
+
type: :runtime
|
51
|
+
prerelease: false
|
52
|
+
version_requirements: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - "~>"
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '0.6'
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: 0.6.8
|
36
60
|
- !ruby/object:Gem::Dependency
|
37
61
|
name: wordsdotdat
|
38
62
|
requirement: !ruby/object:Gem::Requirement
|
@@ -62,7 +86,7 @@ dependencies:
|
|
62
86
|
version: '0.1'
|
63
87
|
- - ">="
|
64
88
|
- !ruby/object:Gem::Version
|
65
|
-
version: 0.1.
|
89
|
+
version: 0.1.7
|
66
90
|
type: :runtime
|
67
91
|
prerelease: false
|
68
92
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -72,29 +96,29 @@ dependencies:
|
|
72
96
|
version: '0.1'
|
73
97
|
- - ">="
|
74
98
|
- !ruby/object:Gem::Version
|
75
|
-
version: 0.1.
|
99
|
+
version: 0.1.7
|
76
100
|
- !ruby/object:Gem::Dependency
|
77
101
|
name: polyrex-builder
|
78
102
|
requirement: !ruby/object:Gem::Requirement
|
79
103
|
requirements:
|
80
104
|
- - "~>"
|
81
105
|
- !ruby/object:Gem::Version
|
82
|
-
version: '0.
|
106
|
+
version: '0.3'
|
83
107
|
- - ">="
|
84
108
|
- !ruby/object:Gem::Version
|
85
|
-
version: 0.
|
109
|
+
version: 0.3.0
|
86
110
|
type: :runtime
|
87
111
|
prerelease: false
|
88
112
|
version_requirements: !ruby/object:Gem::Requirement
|
89
113
|
requirements:
|
90
114
|
- - "~>"
|
91
115
|
- !ruby/object:Gem::Version
|
92
|
-
version: '0.
|
116
|
+
version: '0.3'
|
93
117
|
- - ">="
|
94
118
|
- !ruby/object:Gem::Version
|
95
|
-
version: 0.
|
119
|
+
version: 0.3.0
|
96
120
|
description:
|
97
|
-
email:
|
121
|
+
email: digital.robertson@gmail.com
|
98
122
|
executables: []
|
99
123
|
extensions: []
|
100
124
|
extra_rdoc_files: []
|
@@ -119,9 +143,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
119
143
|
- !ruby/object:Gem::Version
|
120
144
|
version: '0'
|
121
145
|
requirements: []
|
122
|
-
|
123
|
-
rubygems_version: 2.6.13
|
146
|
+
rubygems_version: 3.2.22
|
124
147
|
signing_key:
|
125
148
|
specification_version: 4
|
126
|
-
summary: Builds a Polyrex index (pxindex) XML document from a YAML document
|
149
|
+
summary: Builds a Polyrex index (pxindex) XML document from a YAML document or an
|
150
|
+
indented list
|
127
151
|
test_files: []
|
metadata.gz.sig
CHANGED
Binary file
|