md_edit 0.1.10 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 47045b90019ff71705e40bb300f6a14acc4c4ec0
4
- data.tar.gz: ba10aac7d1d29fa69a7b589a13d7e464c35c6cc3
3
+ metadata.gz: ec6a883f3e9cd0e24504c0892a82a94dbf5eda8b
4
+ data.tar.gz: 97a7fdb671c26f124900a21bfa6ea89f822ac16c
5
5
  SHA512:
6
- metadata.gz: 0d13ae45845fd8f982284abb6078414d4d92ce269c64b072dbdd4788a574ef6aa82207032bc82ff60c1b2096a1a71c4ebef537783ad25d66c5648a0686c18490
7
- data.tar.gz: 96be01978e1caeae144d73d9bb021bf6f062d46d8093934932ea43ce1421102002189fff4a42d91fbe56d1f3b4c506820ded6625c7347d123aec6fcbc5a08177
6
+ metadata.gz: ca6e77d81b5968c20b1da74d23c7ffd4d090051f629fdfab1958fe69edbe68798c033d01ddcc00ee8ecb93281f67d53b6985164e72f107ea8eb8abe985e87aa8
7
+ data.tar.gz: ff5af7a01cb2d3c6f620b7ffd61a3b80474714b83f6dd2280df68a5e317cf2588e7b31ac89be5e57c7ddb6f82b1f440a3a1b9fbd170ef5c8761d42ab0116faf7
checksums.yaml.gz.sig CHANGED
Binary file
data/lib/md_edit.rb CHANGED
@@ -7,9 +7,12 @@ require 'line-tree'
7
7
  require 'phrase_lookup'
8
8
 
9
9
 
10
+ IGNOREWORDS = ['or', 'the', 'of', 'a', 'if', 'to', 'and',
11
+ 'in', 'is', 'are', 'as', 'it', 'at']
12
+
10
13
  class MdEdit
11
14
 
12
- attr_reader :sections
15
+ attr_reader :sections, :phraseslookup
13
16
 
14
17
  # pass in a Markdown document or a Markdown filename
15
18
  #
@@ -101,16 +104,71 @@ class MdEdit
101
104
  heading ? a.join : a
102
105
  end
103
106
 
104
- def query(s)
105
- @pl.q s
107
+ def query(s, full_trail: false, limit: 10)
108
+
109
+ puts 'query() s: ' + s.inspect if @debug
110
+
111
+ results = []
112
+
113
+ r = @headingslookup.q s
114
+
115
+ if r and r.any? then
116
+
117
+ results = if full_trail then
118
+ r
119
+ else
120
+ r.map{|x| x.split(' > ')[1..-1].join(' > ')}.reject(&:empty?)
121
+ end
122
+
123
+ end
124
+
125
+
126
+ r2 = @phraseslookup.q s, search_tags: true
127
+
128
+ if r2 and r2.any? then
129
+
130
+ a = r2.sort_by {|x| -x.length}
131
+
132
+ # attempt to remove duplicate results from the 1 section
133
+
134
+ a2 = a.group_by {|x| x[/\[[^\]]+\]/]}
135
+
136
+ a2.each do |k,v|
137
+
138
+ s4 = v.first[/\]\s*(.*)/mi,1]
139
+
140
+ index = s4 =~ /#{s}/mi
141
+
142
+ s2 = make_snippet(s4, index, words: [2,2])
143
+
144
+ v[1..-1].each do |x|
145
+
146
+ s5 = x[/\]\s*(.*)/,1]
147
+ index2 = s5 =~ /#{s}/
148
+
149
+ if index2 then
150
+ s3 = make_snippet(s5, index2, words: [2,2])
151
+ v.delete x if s2 =~ /#{s3}/
152
+ else
153
+ v.delete x
154
+ end
155
+ end
156
+
157
+ end
158
+
159
+ end
160
+
161
+ if a2 then
162
+ phrases_found = a2.values.flatten(1)
163
+ results.concat phrases_found if phrases_found
164
+ end
165
+
166
+ results.take limit
167
+
106
168
  end
107
169
 
108
170
  alias q query
109
171
 
110
- def to_h()
111
- @h
112
- end
113
-
114
172
  def to_outline(bullets: false)
115
173
 
116
174
  a = indentor(@s.scan(/^#+ [^\n]+/).join("\n"))
@@ -125,6 +183,40 @@ class MdEdit
125
183
 
126
184
  private
127
185
 
186
+ # returns a hash object; each key contains the heading as well as a phrase
187
+ #
188
+ def build_keyword_list(s, heading)
189
+
190
+ a = s.split.uniq.flat_map do |raw_word|
191
+
192
+ i, pos = 0, []
193
+
194
+ w = raw_word[/\w{2,}/]
195
+
196
+ next if IGNOREWORDS.include? w
197
+ next unless w
198
+
199
+ (pos << (s[i..-1] =~ /#{w}/i); i += pos[-1] + 1) while s[i..-1][/#{w}/i]
200
+
201
+ pos[1..-1].inject([pos[0]]) {|r,x| r << r.last + x + 1 }
202
+
203
+ pos.map do |x|
204
+
205
+ start = x-15
206
+ start = 0 if start < 0
207
+ snippet = make_snippet(s, start)
208
+
209
+ "[%s] %s | %s %s" % [heading, snippet, w.downcase,
210
+ heading.scan(/\w+/).join(' ').downcase]
211
+ end
212
+
213
+
214
+ end
215
+
216
+ a
217
+
218
+ end
219
+
128
220
  def indentor(s)
129
221
 
130
222
  a = s.split(/(?<=\n)(?=#+)/)
@@ -148,20 +240,38 @@ class MdEdit
148
240
 
149
241
  end
150
242
 
151
- def load_sections(raw_s)
152
-
153
- # strip out any new lines gaps which are greater than 1
154
- s = raw_s #.strip.gsub(/\n\s*\n\s*\n\s*/,"\n\n")
243
+ def load_sections(s)
155
244
 
156
245
  @sections = parse s
157
246
 
158
- @h = @sections.keys.inject({}) do |r,x|
247
+ h = @sections.keys.inject({}) do |r,x|
159
248
  r.merge(x.sub(/^#+ +/,'').downcase => 5 - x.count('#'))
160
249
  end
161
-
162
- @pl = PhraseLookup.new @h
250
+
251
+ @headingslookup = PhraseLookup.new h
163
252
  @s = s
164
253
 
254
+ phrases = @sections.flat_map do |heading, raw_value|
255
+
256
+ a = raw_value.take_while {|x| x.is_a? String}
257
+
258
+ next unless a and a.join.strip.length > 0
259
+ #next if a.nil? or a.join.strip.empty?
260
+ build_keyword_list(a.join.strip, heading).compact.map do |s|
261
+ [s, 4 - heading.count('>')]
262
+ end
263
+
264
+ end
265
+
266
+ @phraseslookup = PhraseLookup.new phrases.compact.to_h
267
+
268
+ end
269
+
270
+ def make_snippet(raw_s, start, words: [2, 8])
271
+
272
+ s = raw_s.gsub(/\n/,' ')
273
+ take_words_behind(s[0..start], words: words[0]) +
274
+ take_words(s[start+1..-1], words: words[-1])
165
275
  end
166
276
 
167
277
  def parse(markdown)
@@ -206,7 +316,23 @@ class MdEdit
206
316
 
207
317
  end
208
318
 
209
- end
319
+ end
320
+
321
+
322
+ def take_words(s, words: 8)
323
+
324
+ r = s[/^(?:\S+\s+){#{words}}/m]
325
+ r ? r : s
326
+
327
+ end
328
+
329
+ def take_words_behind(s, words: 2)
330
+
331
+ r = s[/(?:\s+\S+){#{words}}$/m]
332
+ r ? r : s
333
+
334
+ end
335
+
210
336
 
211
337
 
212
338
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: md_edit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.10
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -31,7 +31,7 @@ cert_chain:
31
31
  R5r1JjTBaaHOaqI14bIkwUSY5q1cO4Wl7HWly80kX2AD8o/tPHvyF9Tx3pBvaboU
32
32
  etnSPHbQwmBvFQ==
33
33
  -----END CERTIFICATE-----
34
- date: 2017-12-25 00:00:00.000000000 Z
34
+ date: 2017-12-26 00:00:00.000000000 Z
35
35
  dependencies:
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: line-tree
@@ -62,7 +62,7 @@ dependencies:
62
62
  version: '0.1'
63
63
  - - ">="
64
64
  - !ruby/object:Gem::Version
65
- version: 0.1.5
65
+ version: 0.1.7
66
66
  type: :runtime
67
67
  prerelease: false
68
68
  version_requirements: !ruby/object:Gem::Requirement
@@ -72,7 +72,7 @@ dependencies:
72
72
  version: '0.1'
73
73
  - - ">="
74
74
  - !ruby/object:Gem::Version
75
- version: 0.1.5
75
+ version: 0.1.7
76
76
  description:
77
77
  email: james@jamesrobertson.eu
78
78
  executables: []
metadata.gz.sig CHANGED
Binary file