dxtitle_search 0.2.2 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ce1fbac8b03c6ed8f54bd015c10f8d0cff189489daef845e2ba7334d51c2f43a
4
- data.tar.gz: 1097753eb43800ed8a1361e605fd4e51b427aef0fffcd67649cec7b04a0c12b7
3
+ metadata.gz: 74ea6c681b08bebcc9fa822a185562a19a262544abf12e674a150428470c8396
4
+ data.tar.gz: 1ea0ffc0e631086452ef97e51b5e154f21169d4f4274f5538a4ce7c4e476b67b
5
5
  SHA512:
6
- metadata.gz: ef562657ad220e26e087f672a178475fb15c4e41874f7eacf270c1872e0461763a2b145b509541323467c48ad02e74c91b04d0c14cd15e515a22dd08de90644c
7
- data.tar.gz: 812c6c680a264edd2c0ae95fb63b86fb66485642ddb7e2d41696ac1ff51463e6e73cabd4c7fc29cfc8c9fcd9b6e49bb0374f1d1a2f6676e7bfc903afd515dedc
6
+ metadata.gz: 4e4e998c4df64f0e18ac5556ad197d13ecec2804c9abc7f4ded1a2c7a0a1e89f5c73a73da7ffd8ed418ee5f2eea23233b675eb79412cf68e09c3bb0e333d2def
7
+ data.tar.gz: 45fc402f35aa22ff4bc66ecf752f8bb946acfeea84106513b382ba921b41c5f68882eb5b415146e13f40be5b28bdb238dc49f68c3ecdd41e7aab5662af00df87
Binary file
data.tar.gz.sig CHANGED
Binary file
@@ -2,102 +2,54 @@
2
2
 
3
3
  # file: dxtitle_search.rb
4
4
 
5
- require 'dynarex'
5
+ require 'indexer101'
6
6
 
7
7
 
8
8
  class DxTitleSearch
9
9
 
10
- def initialize(obj=nil, sources: obj, debug: false)
10
+ def initialize(obj=nil, sources: nil, debug: false)
11
11
 
12
12
  @debug = debug
13
+ @indexer = Indexer101.new debug: debug
13
14
 
14
15
  s = if sources then
15
16
 
16
- dx = Dynarex.new(sources)
17
- dx.all.map {|x| read x.uri }.join
17
+ dx = Dynarex.new(sources)
18
+ @indexer.scan_dxindex dx.all.map(&:uri), level: 1
18
19
 
19
- elsif obj then
20
-
21
- # is it a Dynarex file location?
22
- if obj.lines.length < 2 then
20
+ elsif obj and obj.lines.length < 2
23
21
 
24
- read obj
25
-
26
- else
27
-
28
- obj
22
+ @indexer.scan_dxindex obj, level: 1
29
23
 
30
- end
31
24
  end
32
25
 
33
- @h = h = s.lines.inject({}) do |r,x|
34
- key, value = x.split(/\s+(?=[^\s]+$)/,2)
35
- r.merge(key.rstrip => value)
36
- end
37
-
38
- @a = h.keys
26
+ @indexer.build
39
27
 
40
28
  end
41
29
 
42
- def search(keywords)
43
-
44
- phrases = @a.grep /#{keywords}/i
45
-
46
- # find out the keywords count for each entry found
47
- a0 = keywords.split.flat_map do |x|
48
- next if @a.length < 2
49
- @a.grep /#{x}/i
50
- end
51
-
52
- a = a0.uniq.map do |entry|
53
- [entry, entry.scan(/#{keywords.split.join('|')}/).uniq.count]
54
- end
55
-
56
- # sort by keywords found per entry and then date
57
- #a2 = (phrases + a).uniq.sort do |x, x2|
58
- a2 = a.sort do |x, x2|
59
- -([x.last, x.first[/^\d+/], ] <=> [x2.last, x2.first[/^\d+/]])
60
- end
30
+ def search(keywords)
61
31
 
32
+ a2 = @indexer.search keywords.split(/[\s:"!\?\(\)£]+(?=[\w#_'-]+)/)
62
33
  # format each result as a Hash object
63
- a3 = (phrases + a2).map do |x|
34
+ a3 = a2.map do |date, title, url|
64
35
 
65
- if x.length > 1 then
66
- line, _ = x
67
- else
68
- line = x
69
- end
70
-
71
- puts 'line: ' + line.inspect if @debug
72
-
73
- rawtime, title = line.split(/ +/,2)
74
- puts 'title: ' + title.inspect if @debug
75
-
76
- {title: title, url: @h[line].chomp, date: Time.at(rawtime.to_i)}
36
+ {title: title, url: url, date: date}
77
37
 
78
38
  end
79
39
 
80
40
  puts 'a3: ' + a3.inspect if @debug
41
+
42
+ def a3.to_dx()
43
+ Dynarex.new('results/result(title, url, date)').import(self)
44
+ end
81
45
 
82
46
  return a3
83
47
 
84
48
  end
85
49
 
86
- def tag_search(keywords)
87
- a = @a.flat_map {|x| x.split(/#/,2).last.split(/\s*#/)}
88
- a.grep(/^#{keywords}/i).map(&:downcase).uniq
89
- end
90
-
91
- private
92
-
93
- def read(source)
94
-
95
- dx = Dynarex.new(source)
96
-
97
- dx.all.map do |x|
98
- "%d %s %s" % [Time.parse(x.created).to_i, x.title, x.url]
99
- end.join("\n")
100
-
101
- end
50
+ def tag_search(keywords)
51
+ r = @indexer.lookup *keywords.split(/[\W]+(?=[\w]+)/).map {|x| "#" + x}
52
+ r.map {|x| x.to_s[1..-1]}
53
+ end
102
54
 
103
55
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dxtitle_search
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -35,28 +35,28 @@ cert_chain:
35
35
  ODgOmcgAsTin0T9l0vWQJnj2OClh1pcC6tFnQXXcYuwTpbtM2v4OmUfUDvJmwQ42
36
36
  POydZUYZg49X9TlTDqPMKed0
37
37
  -----END CERTIFICATE-----
38
- date: 2019-08-11 00:00:00.000000000 Z
38
+ date: 2019-11-12 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
- name: dynarex
41
+ name: indexer101
42
42
  requirement: !ruby/object:Gem::Requirement
43
43
  requirements:
44
44
  - - "~>"
45
45
  - !ruby/object:Gem::Version
46
- version: '1.8'
46
+ version: '0.2'
47
47
  - - ">="
48
48
  - !ruby/object:Gem::Version
49
- version: 1.8.19
49
+ version: 0.2.1
50
50
  type: :runtime
51
51
  prerelease: false
52
52
  version_requirements: !ruby/object:Gem::Requirement
53
53
  requirements:
54
54
  - - "~>"
55
55
  - !ruby/object:Gem::Version
56
- version: '1.8'
56
+ version: '0.2'
57
57
  - - ">="
58
58
  - !ruby/object:Gem::Version
59
- version: 1.8.19
59
+ version: 0.2.1
60
60
  description:
61
61
  email: james@jamesrobertson.eu
62
62
  executables: []
@@ -83,8 +83,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
83
83
  - !ruby/object:Gem::Version
84
84
  version: '0'
85
85
  requirements: []
86
- rubygems_version: 3.0.1
86
+ rubygems_version: 3.0.3
87
87
  signing_key:
88
88
  specification_version: 4
89
- summary: Search title entries from plain text (derived from a Dynarex document).
89
+ summary: Originally intended to search title entries from plain text (derived from
90
+ a Dynarex document).
90
91
  test_files: []
metadata.gz.sig CHANGED
Binary file