dxtitle_search 0.2.0 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: febca2af1d5f671487bb3a374e0ce3a3a1c9d1bc61a365a7e264da32f2865d44
4
- data.tar.gz: 8fb8561355abb9b169b789fa9388e6c70f12c92521d4dea14594ab74e8ad69d9
3
+ metadata.gz: e227dc645595c967bf03dd213ea6846c7c9c438a26ee7db3fd059f464493f370
4
+ data.tar.gz: eccff27f864a56f00f0952d8ddd47a70eaeb2fca6eca24d961cf4c91d54f7387
5
5
  SHA512:
6
- metadata.gz: c6705c471563eede40e9dd71aa133dc3e5adf9a2f014864c44312aab6a3235bd335648b2dc9a40f6d3d1a339ea50258cfe2c31f56d61f617f67dba5be4237379
7
- data.tar.gz: 9268dc3c81722d2e42824a3e6ac175e7c12e4927254fcdb7f95825f3797b060e2c912ba62af4517841c4f076cd3379ee4be48c44e866796037fb7cb27ad2520c
6
+ metadata.gz: db771d09b581573be92de6c65b79c3f635dbcb6187e0bc1d6a00c5d4042a3f13f4838696588d65cc9856be56ed5666047221204e5910fbe38e8aec42f075ee70
7
+ data.tar.gz: f39cbbb2ca067cfb4c307787b94dab70c7c0d34255037e24e9228b32f284ebbe6361523456d078659aa11e5e3e694ef6dfae3890b18b87ce90d2a707d618a14d
checksums.yaml.gz.sig CHANGED
Binary file
data.tar.gz.sig CHANGED
Binary file
@@ -2,96 +2,95 @@
2
2
 
3
3
  # file: dxtitle_search.rb
4
4
 
5
- require 'dynarex'
5
+ require 'indexer101'
6
6
 
7
7
 
8
8
  class DxTitleSearch
9
9
 
10
- def initialize(obj=nil, sources: nil, debug: false)
10
+ def initialize(obj=nil, sources: nil, level: 1, debug: false)
11
11
 
12
12
  @debug = debug
13
+ @indexer = Indexer101.new debug: debug
14
+ @level = level
13
15
 
14
16
  s = if sources then
15
17
 
16
- dx = Dynarex.new(sources)
17
- dx.all.map {|x| read x.uri }.join
18
+ dx = Dynarex.new(sources)
19
+ puts 'before scan_dxindex' if @debug
20
+ a = dx.all.map(&:uri)
21
+ puts 'a: ' + a.inspect if @debug
22
+ @indexer.scan_dxindex a, level: level
18
23
 
19
- elsif obj then
20
-
21
- # is it a Dynarex file location?
22
- if obj.lines.length < 2 then
23
-
24
- read obj
24
+ elsif obj and (obj.is_a?(DxLite) or obj.is_a?(Dynarex)) or obj.lines.length < 2
25
25
 
26
- else
26
+ @indexer.scan_dxindex obj, level: level
27
27
 
28
- obj
29
-
30
- end
31
28
  end
32
29
 
33
- @h = h = s.lines.inject({}) do |r,x|
34
- key, value = x.split(/\s+(?=[^\s]+$)/,2)
35
- r.merge(key.rstrip => value)
36
- end
37
-
38
- @a = h.keys
30
+ #jr230620 @indexer.build
39
31
 
40
32
  end
41
33
 
42
- def search(keywords)
43
-
44
- phrases = @a.grep /#{keywords}/i
34
+ def search(keywords, minchars: 3)
45
35
 
46
- # find out the keywords count for each entry found
47
- a0 = keywords.split.flat_map do |x|
48
- next if @a.length < 2
49
- @a.grep /#{x}/i
50
- end
51
-
52
- a = a0.uniq.map do |entry|
53
- [entry, entry.scan(/#{keywords.split.join('|')}/).uniq.count]
54
- end
36
+ a2 = @indexer.search keywords.split(/[\s:"!\?\(\)£]+(?=[\w#_'-]+)/), \
37
+ minchars: minchars
38
+ # format each result as a Hash object
39
+ a3 = a2.map do |date, title, url|
55
40
 
56
- # sort by keywords found per entry and then date
57
- #a2 = (phrases + a).uniq.sort do |x, x2|
58
- a2 = a.sort do |x, x2|
59
- -([x.last, x.first[/^\d+/], ] <=> [x2.last, x2.first[/^\d+/]])
60
- end
41
+ {title: title, url: url, date: date}
61
42
 
62
- # format each result as a Hash object
63
- a3 = (phrases + a2).map do |x|
43
+ end
64
44
 
65
- if x.length > 1 then
66
- line, _ = x
67
- else
68
- line = x
69
- end
45
+ puts 'a3: ' + a3.inspect if @debug
46
+
47
+ @dx = Dynarex.new('results/result(title, url, date)').import(a3)
48
+
49
+ def a3.to_dx()
50
+ Dynarex.new('results/result(title, url, date)').import(self)
51
+ end
52
+
53
+ def a3.to_tags()
54
+ a = self.map {|x| x[:title].scan(/(?<=#)(\w+)/)}.flatten
55
+ a.uniq.sort.map {|x| [x, a.count(x)]}
56
+ end
57
+
58
+ def a3.search(keywords)
70
59
 
71
- puts 'line: ' + line.inspect if @debug
72
-
73
- rawtime, title = line.split(/ +/,2)
74
- puts 'title: ' + title.inspect if @debug
75
-
76
- {title: title, url: @h[line], date: Time.at(rawtime.to_i)}
60
+ dx = Dynarex.new('results/result(title, url, date)').import(self)
77
61
 
62
+ level = keywords[0] == '#' ? 0 : 1
63
+ dts = DxTitleSearch.new dx, level: level
64
+ dts.search keywords
65
+
78
66
  end
67
+
68
+ def a3.tag_search(keywords)
69
+
70
+ dx = Dynarex.new('results/result(title, url, date)').import(self)
79
71
 
80
- puts 'a3: ' + a3.inspect if @debug
72
+ level = keywords[0] == '#' ? 0 : 1
73
+ dts = DxTitleSearch.new dx, level: level
74
+ dts.tag_search keywords
75
+
76
+ end
81
77
 
82
78
  return a3
83
79
 
84
80
  end
85
81
 
86
- private
87
-
88
- def read(source)
89
-
90
- dx = Dynarex.new(source)
82
+ def tag_search(keywords)
83
+ r = @indexer.lookup *keywords.split(/[\W]+(?=[\w]+)/).map {|x| "#" + x}
84
+ r.map {|x| x.to_s[1..-1]}
85
+ end
91
86
 
92
- dx.all.map do |x|
93
- "%d %s %s" % [Time.parse(x.created).to_i, x.title, x.url]
94
- end.join("\n")
87
+ def to_tags()
88
+
89
+ a = @indexer.index.map do |key, value|
90
+ [key.to_s[1..-1], value.length]
91
+ end
92
+
93
+ a.sort_by(&:first)
95
94
 
96
95
  end
97
96
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dxtitle_search
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -11,54 +11,54 @@ cert_chain:
11
11
  - |
12
12
  -----BEGIN CERTIFICATE-----
13
13
  MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
14
- YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMTkwNzIzMjE0NjM1WhcN
15
- MjAwNzIyMjE0NjM1WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
- cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQC4fglD
17
- MJeEttUrAcVhZbHOvDxmej9AsrgXLSOdp7pVoUXQk1tEVjKH+1ibOhxxhx5Eiptk
18
- UGqqb/X67jXasjLr1wtY2dw/LKLAIoX5iPqE4vI0I0L5tGSF9i60f1KdgfIpgnx+
19
- fVNq32btdyoFuJBNG7yDBFjJ21ao5kyO9xpSUHnUDrrwyrdTZc9v9rG0kUEKzK/w
20
- 9obVvTWBmfIjBt3Vf5O2f88S+6boBJu6fDn0smnJEScHwTgj65yLN+ejPAanlqqt
21
- sxxxjLkdYl3LVDRUKXL09XcENdDFKwyiZu7kKJiVq7kvUcwMOBFobp2iWY9Yiam9
22
- HeRjOr8W35bZ3xScPLIVU08BYscUxoOHsMRpNPUY5I8SIYCLPJ7YvBO4ZlxJT5xQ
23
- E7M8aG/uKxwHKgHJu/3siL2cU/YtkVNS9yTxHdzD++90q3sC8uOnBLUMDTsKpV4l
24
- +jHMccyHbQXoazVQoVu7xNup2BpbiHALacarCLvu9GGJLrXkcX40ujB+TMcCAwEA
25
- AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUTUrPUS7w
26
- ILFIUp8s90zt8ljXtpkwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
14
+ YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjEwMjIyMDAxMTU0WhcN
15
+ MjIwMjIyMDAxMTU0WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
+ cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQDxwG7T
17
+ /9FLpGJct6ptTTRa2UnYj9qeRp2KmjnMpONFP7Owl1yjh2auzyIfDChG7G2W9kZ4
18
+ eOPno39R2GJOxt5IVTxmjgBpAUl8xW3n629b2jE7al1GJZ89Jm3Fqs1rBeUaC0wB
19
+ DIX4SVwHqfzxvZTVLVZ5LPIKqxNiJe8eUIRRzzc0/XzyFTh3Sg4SH1ncm04vvxWg
20
+ piQyL6iUeUuirHjotchLINWzi2A4PgO63/YYM3tYPzbykkiBrE/iJHnPuFEjNfIj
21
+ +tVTvPB0wXdCDNf6yHr9cfy0t97eVGTbwv+UhWX9Vmz6/t4bi9X2xhmdp32y9CK6
22
+ s/I16J3oC70JvsTKOhDAM3OMQFjCpBbtyFYuJDbDbQKD/elGXTHvAmCi5skCNJzE
23
+ oq1l5aVpp3IFrZQfLEimeDDEq8ulEajlllpitq1vD+vycRSydybtAhMs/kK7DY7D
24
+ kVGOI1HDz5p+A6e8wyXoecdaQeswH3gd3nkCKfV7ohKXXywW8JWMEBgYj4ECAwEA
25
+ AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQU1qWuKrKD
26
+ A90s8y+KBolbTgtLOP8wJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
27
27
  c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
28
- BgkqhkiG9w0BAQsFAAOCAYEAaEl3CKusWW3/wUutwDz0w0ojYuc48LOVrmGpZ8uH
29
- fi2wJx5Jt41/qxGWCz0qo/BFfFZjNllblywzhjIw80HnoGAR5bDaKF0fTdYsh++a
30
- Vyx6BkFVM5hYXIJklBT/PLR7JW/rOrFgPNrXM+BlWlFBGIQT8KaF7SfOYrDdqHG8
31
- uLlGPgLkR0U3tLfyWwF6548QRVz2Lm53dWeT0JGs974Dj+n1b8P3N0Uv1AFpetNB
32
- UTelmW2QjiLgxJxkFkRjp/Ep0s/puw3FbtaviQDBBRr9I/bMAE5QOtKa3k1e7m70
33
- 39utqhm1XpZeJamPbeuSFl6M3RRiwWACVlz9QjWVgA5JSf1GwCsc0nsa76pq7Yfj
34
- DtQaJilztwDXwMGWAJ5qDOiGcuv3Wmec+Nnr/9xoZRA9V/9GqXlWjqSSLgFHk/r1
35
- ODgOmcgAsTin0T9l0vWQJnj2OClh1pcC6tFnQXXcYuwTpbtM2v4OmUfUDvJmwQ42
36
- POydZUYZg49X9TlTDqPMKed0
28
+ BgkqhkiG9w0BAQsFAAOCAYEAHJI3akrM/ne2v1zYcZOy0POu/LLfcBM93BuQcfiN
29
+ ok256P7VWXSjU2TaR8/vLDK0dbZo5V08WCKk08yuqOdzOII7JSVh0QHzzgFTR7yS
30
+ RG3xDZtp6j4MXzoAkLd06rIL1O21GXGc49Wkc7XHeFytbRF8XhT29e4H4s5KMoJ1
31
+ l9Ks2oMc4USzKo7loCU+1q0Sg8BQEpS2+9RNTpG7MKDdneWXhb2o6UTQ3WIDpfEt
32
+ ImShbAmoVHcdMa3HZUH0Pa+pzr/KrM0sg2fa2trlJS9oDq9pZl/cQO6Ryl76lqKg
33
+ bQ7jxCYFron1UH6iS3bJBPfFmIWcio3zT8VmeqVPhXatDDnb00JijkO/WThobMxb
34
+ aZJQ2A0sZWHF9u5v9gda8mq21CID3YMPRzQ1u5o4jjyNhcVyjW29GvfD8+8D4s46
35
+ g10O6gIBUztpsNHwbBTi9fHgtis4E5dAPAo4uzo4MCKVwi72iZQ3E92ACYb9zlxA
36
+ g6CsTY0vUjhgfh9taSRj3qxd
37
37
  -----END CERTIFICATE-----
38
- date: 2019-08-07 00:00:00.000000000 Z
38
+ date: 2021-02-22 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
- name: dynarex
41
+ name: indexer101
42
42
  requirement: !ruby/object:Gem::Requirement
43
43
  requirements:
44
44
  - - "~>"
45
45
  - !ruby/object:Gem::Version
46
- version: '1.8'
46
+ version: '0.2'
47
47
  - - ">="
48
48
  - !ruby/object:Gem::Version
49
- version: 1.8.19
49
+ version: 0.2.5
50
50
  type: :runtime
51
51
  prerelease: false
52
52
  version_requirements: !ruby/object:Gem::Requirement
53
53
  requirements:
54
54
  - - "~>"
55
55
  - !ruby/object:Gem::Version
56
- version: '1.8'
56
+ version: '0.2'
57
57
  - - ">="
58
58
  - !ruby/object:Gem::Version
59
- version: 1.8.19
59
+ version: 0.2.5
60
60
  description:
61
- email: james@jamesrobertson.eu
61
+ email: digital.robertson@gmail.com
62
62
  executables: []
63
63
  extensions: []
64
64
  extra_rdoc_files: []
@@ -83,8 +83,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
83
83
  - !ruby/object:Gem::Version
84
84
  version: '0'
85
85
  requirements: []
86
- rubygems_version: 3.0.1
86
+ rubyforge_project:
87
+ rubygems_version: 2.7.10
87
88
  signing_key:
88
89
  specification_version: 4
89
- summary: Search title entries from plain text (derived from a Dynarex document).
90
+ summary: Originally intended to search title entries from plain text (derived from
91
+ a Dynarex document).
90
92
  test_files: []
metadata.gz.sig CHANGED
Binary file