pxindex-builder 0.1.2 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 67934e9d607e3570a66cf15e057f400ff5ecf589
4
- data.tar.gz: fd66201c28faf4f5227ec224c81eb00ef8751a5a
2
+ SHA256:
3
+ metadata.gz: 8d16d5f32fb73bc13efabdea2aa410e492cd25a58d69de231b859add956ea537
4
+ data.tar.gz: b618cd9d89f9719ca8e27eef1941af060cef59e044acb38daf4e0d1bdc16d59e
5
5
  SHA512:
6
- metadata.gz: 40f97d7d4c6a4201f63bacbaefa3374f92887285064b152f25f8d6a1f0efac63e63d390908215984ab0cc6976e24074c0daaaaa5e4eabdc26e2033cea65a42b8
7
- data.tar.gz: 00c5a55685c184d0af6b9dcdbc6423e38d3120f96a4b7e48ac5014a7d3303ee02fba27512e62033343937d75f3a36569a9602e227c97f8a56f545f9ae57bbeac
6
+ metadata.gz: c98340121ac8b0633126ad2921a9426f6b514b61bc7a57f13da0b0e0060cb1d5b597dd7418ea39c2b837d6dd67050c72b2ce9231656c6a57beb384b66d8994e4
7
+ data.tar.gz: c9ff7177b2616fce22d6d0f2508ec8acd9c9816257c92183a2fe54fcbb1803669672dccb4d5faf2e7ef94d1a14e180f1a5106b61e7ceb05c05fa88e87a483345
checksums.yaml.gz.sig CHANGED
Binary file
@@ -2,26 +2,63 @@
2
2
 
3
3
  # file: pxindex-builder.rb
4
4
 
5
+ require 'line-tree' # *new*
5
6
  require 'wordsdotdat'
6
7
  require 'phrase_lookup'
7
8
  require 'polyrex-builder'
9
+ require 'rxfreader'
8
10
 
9
11
 
10
12
  class PxIndexBuilder
11
13
 
12
- attr_reader :to_xml, :to_h
14
+ attr_reader :to_xml, :to_h, :to_s
13
15
 
14
- def initialize(obj, ignore=[])
16
+ def initialize(obj, debug: false, ignore: [])
17
+
18
+ @debug = debug
19
+ puts 'inside initialize: ' if @debug
20
+
21
+ if obj.is_a? String then
22
+
23
+ s, _ = RXFReader.read(obj)
24
+
25
+ s =~ /^---/ ? import_phrases(YAML.load(s), s, ignore) : import_index(s)
15
26
 
16
- h = if obj.is_a? String then
17
-
18
- s, _ = RXFHelper.read(obj)
19
- YAML.load(s)
20
-
21
27
  elsif obj.is_a? Hash
22
- obj
28
+ import_phrases obj, s, ignore
23
29
  end
24
30
 
31
+ end
32
+
33
+
34
+ private
35
+
36
+ def import_index(raw_s)
37
+
38
+ # find the entries which aren't on the main index
39
+ s = raw_s.sub(/<[^>]+>\n/,'')
40
+ doc = LineTree.new(s, debug: @debug).to_doc(encapsulate: true)
41
+ a = doc.root.xpath('entry/text()')
42
+ puts 'a: ' + a.inspect if @debug
43
+ puts 'doc: ' + doc.xml if @debug
44
+ a2 = doc.root.xpath('entry//entry/text()')
45
+ puts 'a2: ' + a2.inspect if @debug
46
+ a3 = a2.map(&:rstrip) - a.map(&:rstrip)
47
+ puts 'a3:' + a3.inspect if @debug
48
+
49
+ # add the new entries to the main index
50
+ s << "\n" + a3.join("\n")
51
+
52
+ s.prepend '<?ph schema="entries/section[heading]/entry[title, url]"?>
53
+
54
+ '
55
+
56
+ @to_s = s
57
+
58
+ end
59
+
60
+ def import_phrases(h, s, ignore=[])
61
+
25
62
  words = h.keys.join(' ').split(/ +/).map {|x| x[/\w+/]}.uniq\
26
63
  #.tap {|x| puts 't: ' + x.inspect}
27
64
  .reject {|x| x.length < 3}\
@@ -29,11 +66,11 @@ class PxIndexBuilder
29
66
  .reject {|x| x.length < 4 and !WordsDotDat.list.include? x.downcase}\
30
67
  .group_by(&:chr).sort
31
68
 
32
- pl = PhraseLookup.new s
69
+ pl = PhraseLookup.new s
33
70
 
34
71
  index = words.map do |letter, list|
35
72
 
36
- a = list.map do |w|
73
+ a = list.map do |w|
37
74
  phrases = pl.q(w)
38
75
  [w, phrases, phrases.map {|x| h[x] }.max]
39
76
  end
@@ -43,19 +80,20 @@ class PxIndexBuilder
43
80
  end
44
81
 
45
82
  @to_h = h = scan(index)
83
+ puts 'h: ' + h.inspect if @debug
46
84
  @to_xml = PolyrexBuilder.new(h, parents: %i(entry)).to_xml
47
85
 
48
86
 
49
87
  end
50
88
 
51
- private
89
+
52
90
 
53
91
  def scan(rows)
54
92
 
55
93
  rows.map do |x|
56
94
 
57
95
  head, body, _ = x
58
-
96
+
59
97
  a = [{title: head}]
60
98
  a << scan(body) if body and body.any?
61
99
  a
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pxindex-builder
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -10,29 +10,53 @@ bindir: bin
10
10
  cert_chain:
11
11
  - |
12
12
  -----BEGIN CERTIFICATE-----
13
- MIIDljCCAn6gAwIBAgIBATANBgkqhkiG9w0BAQUFADBIMRIwEAYDVQQDDAlnZW1t
14
- YXN0ZXIxHjAcBgoJkiaJk/IsZAEZFg5qYW1lc3JvYmVydHNvbjESMBAGCgmSJomT
15
- 8ixkARkWAmV1MB4XDTE3MTAyMDIwMTAxOVoXDTE4MTAyMDIwMTAxOVowSDESMBAG
16
- A1UEAwwJZ2VtbWFzdGVyMR4wHAYKCZImiZPyLGQBGRYOamFtZXNyb2JlcnRzb24x
17
- EjAQBgoJkiaJk/IsZAEZFgJldTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC
18
- ggEBAMXjj+v8NMIRgsZUJvkGbi6J6O8u7CxQdPVKyHqeN1/hbuu/X9Mpmdf7HSSh
19
- 5jeRWa8nksDy1oCedj/zZ1ogdiIZUa6awLwFxov9HYgGikgP1Bgfd3i/NO0ehHtx
20
- XzwRyJet9u/PoPuQWok55UaYW9AmecD2x0HXohCya1vNWE5Y53hm1aQxFdIUnt/i
21
- u/qLokkMRwSZpiO+myvvufucl61/VnDkdripo82qyJhuIS64PUCskVyu2YGcPhPg
22
- edkHRvJQ/UBSjMreUmd2PR039TThwmKeWfU9zF8WXqmeNuAQXMY/gDyP/wKXZmXe
23
- kA5+ZZSHs+vy3pkPGUuXTKc9TRECAwEAAaOBijCBhzAJBgNVHRMEAjAAMAsGA1Ud
24
- DwQEAwIEsDAdBgNVHQ4EFgQUHOvZh5XYlwPQsrzMFknx6Uj2nOEwJgYDVR0RBB8w
25
- HYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1h
26
- c3RlckBqYW1lc3JvYmVydHNvbi5ldTANBgkqhkiG9w0BAQUFAAOCAQEAoakCxolW
27
- TJ/J24Zp50sbAWp/8y3DFY6TlDgGg7UEyAXpOkp9lpOlfp4XVvZz9rFyjSyKwXXV
28
- d65xW3Crazo0RTuss36SC4HRzx8FtFnyrAAiJJcQi9/z1dHOtFs2CbMeqtUbJwIP
29
- Vrik8nKqZCw3toOPaDTcI9wijaLPVE0Pqtci5yVZa5tM3nbnjWp70iumXhJhllog
30
- k2sgD5CTNK5yx0cxmMGWSLRCAkU+EnEuB4bZdE8yg3acXg2jemonuUxB0C/HCsA8
31
- rpKyBm8O20gPepUR+u4B3OOMZL1vUMzoAppGGdK/fa9iZHfP9tccqmgXIQ8c4X1f
32
- C74enOZQySX7LA==
13
+ MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
14
+ YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjIwMjIzMTkwNTAxWhcN
15
+ MjMwMjIzMTkwNTAxWjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
+ cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQDW85TI
17
+ /Z5K4vUfBYKBHGpILk6JxCmBYJJFmYPjDQwuwfrmrU5lvPUDAdp9kQycXcKVc5Uy
18
+ XhKfobl5EFqs6x+k5Rr5W5eH4XsFy5pRyExntxcoCAyjNqHiS+kfDLG59s2ZtkBd
19
+ gFD7uS/983qC3d4lVbU2BSkUfmyFSLeEwIU123QwtDeDqG8TyajVmHVCP+Z4rn3m
20
+ m/CAGaK2jVtCl+FodEZBeZzmEQ7MfI5HKGbxXBTGplLsJlCnqsT1z1czg9TS7MQb
21
+ iYleiEzGfONtEKdXx3qxP3P4kiNTUy5ZBDrT3l9cbXDrlFTjLFhbAgekouEi2Q8M
22
+ rX2anLFWnXH4PR7B+Z+2gunVQf51m9Zas+IXJhFU2Pf6ANlak2/QDQZ/3L8A3REM
23
+ yCvvPf8ZOsycdV1ND4IUvjl9psJf4hxITq9gmfpKLR3rwK5yNK8L7t0YzDRY0wSv
24
+ eoxpVnCkhPBgYyGqbVdb8vo20HpO6upTnbYqDB6MXt6mDYXC2Fc/6eZ+h20CAwEA
25
+ AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUg0DERRht
26
+ KGlPVqCvhV483EvmfFswJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
27
+ c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
28
+ BgkqhkiG9w0BAQsFAAOCAYEAUnDHC51C7bosMGleHWQ6WmADz9c1rWcI7O7hc2O0
29
+ YVJb7lWr7mbcxdHUzbFe3FZD3lbwLpV/tPBhSWA8IopouMVKEA8wLvbAy7frwRJ8
30
+ EZjFVzeXmUDY2MAbgLOh1SYgiCTYpTbTgiCByFgxuMX1cua9vAyVw2cCa7prAoj7
31
+ kZBw5ZSbabGAjGrvqgvH2+18vt55PJEUsbprVB7KOHhs3chPvDDBLYbVotlizS/D
32
+ 0zfMydUnO8GqkYSx7oked2Vx98QqJB8hQgD9gBw/g8ITbCc2RcJDMBw64synIBtM
33
+ zK1YL6OKysdroRHd7PMfui82y5TazNdbs+a0mfLslnYmuD1Qyz48G0RBf9uiM7NN
34
+ kWVg9FrcOSwREdGPQo8Nm5XBrnjNQ5kMt0dkD0hYl/6c3GlVZnNnTQtTVrI2U7/0
35
+ C5ZqDgzQ3HQLwk8KbRjlBIdMHVjHfLaqNRllK8YWKSim4ZMMaBUnTZeZE8tRUiV6
36
+ 5sJpSrhbx0blESXn24F1Nx/U
33
37
  -----END CERTIFICATE-----
34
- date: 2017-12-17 00:00:00.000000000 Z
38
+ date: 2022-02-23 00:00:00.000000000 Z
35
39
  dependencies:
40
+ - !ruby/object:Gem::Dependency
41
+ name: line-tree
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '0.6'
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: 0.6.8
50
+ type: :runtime
51
+ prerelease: false
52
+ version_requirements: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - "~>"
55
+ - !ruby/object:Gem::Version
56
+ version: '0.6'
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: 0.6.8
36
60
  - !ruby/object:Gem::Dependency
37
61
  name: wordsdotdat
38
62
  requirement: !ruby/object:Gem::Requirement
@@ -62,7 +86,7 @@ dependencies:
62
86
  version: '0.1'
63
87
  - - ">="
64
88
  - !ruby/object:Gem::Version
65
- version: 0.1.5
89
+ version: 0.1.7
66
90
  type: :runtime
67
91
  prerelease: false
68
92
  version_requirements: !ruby/object:Gem::Requirement
@@ -72,29 +96,29 @@ dependencies:
72
96
  version: '0.1'
73
97
  - - ">="
74
98
  - !ruby/object:Gem::Version
75
- version: 0.1.5
99
+ version: 0.1.7
76
100
  - !ruby/object:Gem::Dependency
77
101
  name: polyrex-builder
78
102
  requirement: !ruby/object:Gem::Requirement
79
103
  requirements:
80
104
  - - "~>"
81
105
  - !ruby/object:Gem::Version
82
- version: '0.2'
106
+ version: '0.3'
83
107
  - - ">="
84
108
  - !ruby/object:Gem::Version
85
- version: 0.2.0
109
+ version: 0.3.0
86
110
  type: :runtime
87
111
  prerelease: false
88
112
  version_requirements: !ruby/object:Gem::Requirement
89
113
  requirements:
90
114
  - - "~>"
91
115
  - !ruby/object:Gem::Version
92
- version: '0.2'
116
+ version: '0.3'
93
117
  - - ">="
94
118
  - !ruby/object:Gem::Version
95
- version: 0.2.0
119
+ version: 0.3.0
96
120
  description:
97
- email: james@jamesrobertson.eu
121
+ email: digital.robertson@gmail.com
98
122
  executables: []
99
123
  extensions: []
100
124
  extra_rdoc_files: []
@@ -119,9 +143,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
119
143
  - !ruby/object:Gem::Version
120
144
  version: '0'
121
145
  requirements: []
122
- rubyforge_project:
123
- rubygems_version: 2.6.13
146
+ rubygems_version: 3.2.22
124
147
  signing_key:
125
148
  specification_version: 4
126
- summary: Builds a Polyrex index (pxindex) XML document from a YAML document
149
+ summary: Builds a Polyrex index (pxindex) XML document from a YAML document or an
150
+ indented list
127
151
  test_files: []
metadata.gz.sig CHANGED
Binary file