pdf-reader-find_text 0.1.1 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 19766f6210ef79a6a29a22f6c0f501c8acc2cb18d539511a8fd163962735264f
4
- data.tar.gz: 101ced07a4294793b2f7b8addb54e7df2f25a254fc5c88e0c5f6f2c13dba6453
3
+ metadata.gz: 3ca302a11748be3c14295a72be6e3e231f1bce93cf92b4eb9a0cec8ff022c1f9
4
+ data.tar.gz: 79bea9f5a62553f91106ad7d47a93d56fd602b86729a911eaf06ec88c39817dc
5
5
  SHA512:
6
- metadata.gz: 27d305f423497fbf1bdb14be0eb777250bd36e6b16f673609e3d193fd9ca317eb7e03b8669448c8987c39bfd15ccabeedc8f823fad654567da08b960d5e0a787
7
- data.tar.gz: 4ac4586a856edc0202b6ce60819de435b4786c369a1ab3e022ccf5907188184b7d2b89b19fd027a6bc8495e81ea0aa41f6761c5881531bf778ebf0710bf3d0a1
6
+ metadata.gz: fab73686e49d78ae4eaceb14cbb84926093c2c54eb088e072b6bab66794d0c5d1a5042bac82d2cf30f1254cb27d740d846685db54dc3b8d578e5ddb0b325cd3d
7
+ data.tar.gz: e89dc5f098da69fec703ccc9b6d2aae92c5ff25d2b29b6ef90bd3081e8303626aad9e05db5f535a4f2c77fd4158ac214048ba51e82d8d6d594f29e6b70f2d121
data/CHANGELOG.md CHANGED
@@ -1,5 +1,13 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.1.2] - 2022-02-24
4
+
5
+ - Handle strings with spaces in a better way
6
+
7
+ ## [0.1.1] - 2022-02-23
8
+
9
+ - Remove spaces before looking for text
10
+
3
11
  ## [0.1.0] - 2022-02-23
4
12
 
5
13
  - Initial release
@@ -3,7 +3,7 @@
3
3
  module PDF
4
4
  class Reader
5
5
  module FindText
6
- VERSION = "0.1.1"
6
+ VERSION = "1.0.0"
7
7
  end
8
8
  end
9
9
  end
@@ -5,14 +5,22 @@ require "pdf-reader"
5
5
  require_relative "find_text/version"
6
6
 
7
7
  module PDF::Reader::FindText
8
- def find_text(text)
9
- text = text.tr(' ', '')
8
+ def find_text(value)
9
+ runs(merge: false).each_cons(value.tr(' ', '').size).map do |chars|
10
+ string = merge_runs_with_max_length(chars, value.size)
11
+ string if string.text[0, value.size] == value
12
+ end.compact
13
+ end
14
+
15
+ private
10
16
 
11
- runs(merge: false).each_cons(text.size).select do |r|
12
- r.map(&:text).join == text
13
- end.map do |r|
14
- PDF::Reader::TextRun.new r.first.x, r.first.y,
15
- r.sum(&:width), r.map(&:font_size).max, r.map(&:text).join
17
+ def merge_runs_with_max_length(chars, length)
18
+ chars.inject do |string, char|
19
+ if string.mergable?(char) && string.text.size < length
20
+ string + char
21
+ else
22
+ string
23
+ end
16
24
  end
17
25
  end
18
26
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader-find_text
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Guillaume Dott
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-02-23 00:00:00.000000000 Z
11
+ date: 2022-11-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: pdf-reader
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 2.9.2
19
+ version: '2.9'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 2.9.2
26
+ version: '2.9'
27
27
  description: PDF::Reader extension to find text in PDF and get the page and position
28
28
  email:
29
29
  - guillaume+github@dott.fr
@@ -61,7 +61,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
61
61
  - !ruby/object:Gem::Version
62
62
  version: '0'
63
63
  requirements: []
64
- rubygems_version: 3.3.5
64
+ rubygems_version: 3.3.25
65
65
  signing_key:
66
66
  specification_version: 4
67
67
  summary: PDF::Reader extension to find text in PDF