mmapper 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (6) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +60 -0
  3. data/ext/extconf.rb +29 -0
  4. data/ext/mmap.go +127 -0
  5. data/lib/mmapper.rb +45 -0
  6. metadata +59 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 624fb225f474ed8da83938adf19dd4e1a3727c8c4c0660d316f6896f620ca5d1
4
+ data.tar.gz: 97607d8736e460e4f788efde73cd35652b5abe850a8dfcc0accfaf68ab29ede1
5
+ SHA512:
6
+ metadata.gz: a91ef5d57628aa47c30b5b0dd4957d41373440b7691792baa93647e57b279bcb2d466ce63f24f43285b6f93e0c711fcac882e6b54e2ebf8ddd5c043a8604a3d7
7
+ data.tar.gz: a5e0e6825d701945192923dadabbe87984a04aa2a3e74031a4e5d183abfd0f43619e531e585422416fea6711e4535e562be813b657f1fc0017fd82a0808a6726
data/README.md ADDED
@@ -0,0 +1,60 @@
1
+ # Mmapper
2
+
3
+ ## Overview
4
+ Mmapper is a Ruby gem that provides a high-performance interface for **memory-mapping (mmap) files** using a Go extension. This allows efficient read access to large files without loading them into memory. As a proof of concept, a binary search function has been implemented to quickly find lines that match a given prefix.
5
+
6
+ For testing purposes, the ICANN .com zone file (22.54gb, over 400 million lines) was loaded with Mmapper and the average time to perform a binary search for a given string was 4.56ms (n=1000), see `benchmark_mmapper.rb`.
7
+
8
+ ## Features
9
+ - **Memory-maps large files** for fast, low-memory access.
10
+ - **Supports multiple mmap instances** at the same time.
11
+ - **Uses Ruby FFI** to interact with a Go extension.
12
+ - **Binary search** for finding lines that start with a given prefix.
13
+
14
+ ## Installation
15
+ You can install the gem locally after building it:
16
+
17
+ ```sh
18
+ gem install mmapper
19
+ ```
20
+
21
+ ## Usage
22
+
23
+ ### Loading a File
24
+ To mmap a file, use `Mmapper.load_file`, which returns an instance:
25
+
26
+ ```ruby
27
+ require "mmapper"
28
+
29
+ mmap = Mmapper.load_file("/path/to/file.txt")
30
+ ```
31
+
32
+ ### Searching for a Matching Line
33
+ The `find_matching_line` method performs a **binary search** for a line that starts with the given prefix:
34
+
35
+ ```ruby
36
+ result = mmap.find_matching_line("example")
37
+ puts result.nil? ? "Not found" : "Found: #{result}"
38
+ ```
39
+
40
+ ### Working with Multiple Files
41
+ Each mmap instance is independent, allowing you to search different files simultaneously:
42
+
43
+ ```ruby
44
+ m1 = Mmapper.load_file("file1.txt")
45
+ m2 = Mmapper.load_file("file2.txt")
46
+
47
+ puts m1.find_matching_line("something")
48
+ puts m2.find_matching_line("another")
49
+ ```
50
+
51
+ ## Running Tests
52
+ There's a small test script:
53
+
54
+ ```sh
55
+ ruby test_mmapper.rb
56
+ ```
57
+
58
+ ## License
59
+ This project is licensed under the MIT License.
60
+
data/ext/extconf.rb ADDED
@@ -0,0 +1,29 @@
1
+ require 'mkmf'
2
+ require 'fileutils'
3
+
4
+ LIB_DIR = File.expand_path(File.dirname(__FILE__))
5
+
6
+ # Detect OS and set correct library name
7
+ LIB_NAME =
8
+ case RUBY_PLATFORM
9
+ when /darwin/ then "libmmapper.dylib"
10
+ when /mingw|mswin/ then "mmapper.dll"
11
+ else "libmmapper.so"
12
+ end
13
+
14
+ LIB_PATH = File.join(LIB_DIR, LIB_NAME)
15
+
16
+ # Ensure Go module is initialized
17
+ Dir.chdir(LIB_DIR) do
18
+ unless File.exist?("go.mod")
19
+ puts "Initializing Go module..."
20
+ system("go mod init mmapper") || raise("Failed to initialize Go module")
21
+ end
22
+
23
+ puts "Building Go shared library for #{RUBY_PLATFORM}..."
24
+ unless system("go build -o #{LIB_NAME} -buildmode=c-shared .")
25
+ raise "Go build failed!"
26
+ end
27
+ end
28
+
29
+ create_makefile('mmapper')
data/ext/mmap.go ADDED
@@ -0,0 +1,127 @@
1
+ package main
2
+
3
+ /*
4
+ #include <stdlib.h>
5
+ */
6
+ import "C"
7
+
8
+ import (
9
+ "bytes"
10
+ "os"
11
+ "sync"
12
+ "syscall"
13
+ )
14
+
15
+ // Mmapper holds the mmap data for a file
16
+ type Mmapper struct {
17
+ mmapData []byte
18
+ fileSize int
19
+ }
20
+
21
+ // Store instances
22
+ var (
23
+ mu sync.Mutex
24
+ mmappers = make(map[int]*Mmapper)
25
+ nextID = 1
26
+ )
27
+
28
+ // mmapFile maps a file into memory.
29
+ func mmapFile(filename string) (*Mmapper, error) {
30
+ file, err := os.Open(filename)
31
+ if err != nil {
32
+ return nil, err
33
+ }
34
+ defer file.Close()
35
+
36
+ fi, err := file.Stat()
37
+ if err != nil {
38
+ return nil, err
39
+ }
40
+
41
+ size := fi.Size()
42
+ if size == 0 {
43
+ return nil, os.ErrInvalid
44
+ }
45
+
46
+ data, err := syscall.Mmap(int(file.Fd()), 0, int(size), syscall.PROT_READ, syscall.MAP_SHARED)
47
+ if err != nil {
48
+ return nil, err
49
+ }
50
+
51
+ return &Mmapper{mmapData: data, fileSize: int(size)}, nil
52
+ }
53
+
54
+ // findLineStart moves backward to find the start of a line.
55
+ func findLineStart(data []byte, pos int) int {
56
+ for pos > 0 && data[pos-1] != '\n' {
57
+ pos--
58
+ }
59
+ return pos
60
+ }
61
+
62
+ // readLine reads a full line starting from a given position.
63
+ func readLine(data []byte, start int, fileSize int) string {
64
+ end := start
65
+ for end < fileSize && data[end] != '\n' {
66
+ end++
67
+ }
68
+ return string(data[start:end])
69
+ }
70
+
71
+ // binarySearchPrefix performs a binary search for a prefix.
72
+ func binarySearchPrefix(m *Mmapper, prefix string) string {
73
+ low, high := 0, m.fileSize-1
74
+ var match string
75
+
76
+ for low <= high {
77
+ mid := (low + high) / 2
78
+ mid = findLineStart(m.mmapData, mid)
79
+
80
+ line := readLine(m.mmapData, mid, m.fileSize)
81
+ if bytes.HasPrefix([]byte(line), []byte(prefix)) {
82
+ match = line
83
+ high = mid - 1
84
+ } else if line < prefix {
85
+ low = mid + len(line) + 1
86
+ } else {
87
+ high = mid - 1
88
+ }
89
+ }
90
+ return match
91
+ }
92
+
93
+ //export CreateMmapper
94
+ func CreateMmapper(filename *C.char) C.int {
95
+ m, err := mmapFile(C.GoString(filename))
96
+ if err != nil {
97
+ return -1 // Error case
98
+ }
99
+
100
+ mu.Lock()
101
+ id := nextID
102
+ nextID++
103
+ mmappers[id] = m
104
+ mu.Unlock()
105
+
106
+ return C.int(id)
107
+ }
108
+
109
+ //export FindMatchingLine
110
+ func FindMatchingLine(mmapperID C.int, prefix *C.char) *C.char {
111
+ mu.Lock()
112
+ m, exists := mmappers[int(mmapperID)]
113
+ mu.Unlock()
114
+
115
+ if !exists {
116
+ return nil
117
+ }
118
+
119
+ match := binarySearchPrefix(m, C.GoString(prefix))
120
+ if match == "" {
121
+ return nil
122
+ }
123
+
124
+ return C.CString(match)
125
+ }
126
+
127
+ func main() {}
data/lib/mmapper.rb ADDED
@@ -0,0 +1,45 @@
1
+ require 'ffi'
2
+
3
+ module Mmapper
4
+ extend FFI::Library
5
+
6
+ LIB_DIR = File.expand_path("../../ext", __FILE__)
7
+
8
+ LIB_NAME =
9
+ case RUBY_PLATFORM
10
+ when /darwin/ then "libmmapper.dylib"
11
+ when /mingw|mswin/ then "mmapper.dll"
12
+ else "libmmapper.so"
13
+ end
14
+
15
+ LIB_PATH = File.join(LIB_DIR, LIB_NAME)
16
+
17
+ unless File.exist?(LIB_PATH)
18
+ Dir.chdir(LIB_DIR) do
19
+ puts "Compiling Go shared library for #{RUBY_PLATFORM}..."
20
+ system("go build -o #{LIB_NAME} -buildmode=c-shared .") || raise("Go build failed")
21
+ end
22
+ end
23
+
24
+ ffi_lib LIB_PATH
25
+
26
+ attach_function :create_mmapper, :CreateMmapper, [:string], :int
27
+ attach_function :find_matching_line, :FindMatchingLine, [:int, :string], :string
28
+
29
+ class Instance
30
+ def initialize(filename)
31
+ @mmapper_id = Mmapper.create_mmapper(filename)
32
+ raise "Failed to load file: #{filename}" if @mmapper_id < 0
33
+ end
34
+
35
+ def find_matching_line(prefix)
36
+ result = Mmapper.find_matching_line(@mmapper_id, prefix)
37
+ return nil if result.nil? || result.empty?
38
+ result
39
+ end
40
+ end
41
+
42
+ def self.load_file(filename)
43
+ Instance.new(filename)
44
+ end
45
+ end
metadata ADDED
@@ -0,0 +1,59 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mmapper
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Carl Dawson
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 2025-01-31 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: ffi
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '1.15'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: '1.15'
26
+ description: Wraps a Go extension for mmap-ing files.
27
+ email:
28
+ - email@carldaws.com
29
+ executables: []
30
+ extensions:
31
+ - ext/extconf.rb
32
+ extra_rdoc_files: []
33
+ files:
34
+ - README.md
35
+ - ext/extconf.rb
36
+ - ext/mmap.go
37
+ - lib/mmapper.rb
38
+ homepage: https://github.com/carldaws/mmapper
39
+ licenses:
40
+ - MIT
41
+ metadata: {}
42
+ rdoc_options: []
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: '0'
50
+ required_rubygems_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ requirements: []
56
+ rubygems_version: 3.6.2
57
+ specification_version: 4
58
+ summary: Mmap-ed files using Go and FFI.
59
+ test_files: []