mmapper 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +60 -0
- data/ext/extconf.rb +29 -0
- data/ext/mmap.go +127 -0
- data/lib/mmapper.rb +45 -0
- metadata +59 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 624fb225f474ed8da83938adf19dd4e1a3727c8c4c0660d316f6896f620ca5d1
|
4
|
+
data.tar.gz: 97607d8736e460e4f788efde73cd35652b5abe850a8dfcc0accfaf68ab29ede1
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a91ef5d57628aa47c30b5b0dd4957d41373440b7691792baa93647e57b279bcb2d466ce63f24f43285b6f93e0c711fcac882e6b54e2ebf8ddd5c043a8604a3d7
|
7
|
+
data.tar.gz: a5e0e6825d701945192923dadabbe87984a04aa2a3e74031a4e5d183abfd0f43619e531e585422416fea6711e4535e562be813b657f1fc0017fd82a0808a6726
|
data/README.md
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
# Mmapper
|
2
|
+
|
3
|
+
## Overview
|
4
|
+
Mmapper is a Ruby gem that provides a high-performance interface for **memory-mapping (mmap) files** using a Go extension. This allows efficient read access to large files without loading them into memory. As a proof of concept, a binary search function has been implemented to quickly find lines that match a given prefix.
|
5
|
+
|
6
|
+
For testing purposes, the ICANN .com zone file (22.54gb, over 400 million lines) was loaded with Mmapper and the average time to perform a binary search for a given string was 4.56ms (n=1000), see `benchmark_mmapper.rb`.
|
7
|
+
|
8
|
+
## Features
|
9
|
+
- **Memory-maps large files** for fast, low-memory access.
|
10
|
+
- **Supports multiple mmap instances** at the same time.
|
11
|
+
- **Uses Ruby FFI** to interact with a Go extension.
|
12
|
+
- **Binary search** for finding lines that start with a given prefix.
|
13
|
+
|
14
|
+
## Installation
|
15
|
+
You can install the gem locally after building it:
|
16
|
+
|
17
|
+
```sh
|
18
|
+
gem install mmapper
|
19
|
+
```
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
### Loading a File
|
24
|
+
To mmap a file, use `Mmapper.load_file`, which returns an instance:
|
25
|
+
|
26
|
+
```ruby
|
27
|
+
require "mmapper"
|
28
|
+
|
29
|
+
mmap = Mmapper.load_file("/path/to/file.txt")
|
30
|
+
```
|
31
|
+
|
32
|
+
### Searching for a Matching Line
|
33
|
+
The `find_matching_line` method performs a **binary search** for a line that starts with the given prefix:
|
34
|
+
|
35
|
+
```ruby
|
36
|
+
result = mmap.find_matching_line("example")
|
37
|
+
puts result.nil? ? "Not found" : "Found: #{result}"
|
38
|
+
```
|
39
|
+
|
40
|
+
### Working with Multiple Files
|
41
|
+
Each mmap instance is independent, allowing you to search different files simultaneously:
|
42
|
+
|
43
|
+
```ruby
|
44
|
+
m1 = Mmapper.load_file("file1.txt")
|
45
|
+
m2 = Mmapper.load_file("file2.txt")
|
46
|
+
|
47
|
+
puts m1.find_matching_line("something")
|
48
|
+
puts m2.find_matching_line("another")
|
49
|
+
```
|
50
|
+
|
51
|
+
## Running Tests
|
52
|
+
There's a small test script:
|
53
|
+
|
54
|
+
```sh
|
55
|
+
ruby test_mmapper.rb
|
56
|
+
```
|
57
|
+
|
58
|
+
## License
|
59
|
+
This project is licensed under the MIT License.
|
60
|
+
|
data/ext/extconf.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'mkmf'
|
2
|
+
require 'fileutils'
|
3
|
+
|
4
|
+
LIB_DIR = File.expand_path(File.dirname(__FILE__))
|
5
|
+
|
6
|
+
# Detect OS and set correct library name
|
7
|
+
LIB_NAME =
|
8
|
+
case RUBY_PLATFORM
|
9
|
+
when /darwin/ then "libmmapper.dylib"
|
10
|
+
when /mingw|mswin/ then "mmapper.dll"
|
11
|
+
else "libmmapper.so"
|
12
|
+
end
|
13
|
+
|
14
|
+
LIB_PATH = File.join(LIB_DIR, LIB_NAME)
|
15
|
+
|
16
|
+
# Ensure Go module is initialized
|
17
|
+
Dir.chdir(LIB_DIR) do
|
18
|
+
unless File.exist?("go.mod")
|
19
|
+
puts "Initializing Go module..."
|
20
|
+
system("go mod init mmapper") || raise("Failed to initialize Go module")
|
21
|
+
end
|
22
|
+
|
23
|
+
puts "Building Go shared library for #{RUBY_PLATFORM}..."
|
24
|
+
unless system("go build -o #{LIB_NAME} -buildmode=c-shared .")
|
25
|
+
raise "Go build failed!"
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
create_makefile('mmapper')
|
data/ext/mmap.go
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
package main
|
2
|
+
|
3
|
+
/*
|
4
|
+
#include <stdlib.h>
|
5
|
+
*/
|
6
|
+
import "C"
|
7
|
+
|
8
|
+
import (
|
9
|
+
"bytes"
|
10
|
+
"os"
|
11
|
+
"sync"
|
12
|
+
"syscall"
|
13
|
+
)
|
14
|
+
|
15
|
+
// Mmapper holds the mmap data for a file
|
16
|
+
type Mmapper struct {
|
17
|
+
mmapData []byte
|
18
|
+
fileSize int
|
19
|
+
}
|
20
|
+
|
21
|
+
// Store instances
|
22
|
+
var (
|
23
|
+
mu sync.Mutex
|
24
|
+
mmappers = make(map[int]*Mmapper)
|
25
|
+
nextID = 1
|
26
|
+
)
|
27
|
+
|
28
|
+
// mmapFile maps a file into memory.
|
29
|
+
func mmapFile(filename string) (*Mmapper, error) {
|
30
|
+
file, err := os.Open(filename)
|
31
|
+
if err != nil {
|
32
|
+
return nil, err
|
33
|
+
}
|
34
|
+
defer file.Close()
|
35
|
+
|
36
|
+
fi, err := file.Stat()
|
37
|
+
if err != nil {
|
38
|
+
return nil, err
|
39
|
+
}
|
40
|
+
|
41
|
+
size := fi.Size()
|
42
|
+
if size == 0 {
|
43
|
+
return nil, os.ErrInvalid
|
44
|
+
}
|
45
|
+
|
46
|
+
data, err := syscall.Mmap(int(file.Fd()), 0, int(size), syscall.PROT_READ, syscall.MAP_SHARED)
|
47
|
+
if err != nil {
|
48
|
+
return nil, err
|
49
|
+
}
|
50
|
+
|
51
|
+
return &Mmapper{mmapData: data, fileSize: int(size)}, nil
|
52
|
+
}
|
53
|
+
|
54
|
+
// findLineStart moves backward to find the start of a line.
|
55
|
+
func findLineStart(data []byte, pos int) int {
|
56
|
+
for pos > 0 && data[pos-1] != '\n' {
|
57
|
+
pos--
|
58
|
+
}
|
59
|
+
return pos
|
60
|
+
}
|
61
|
+
|
62
|
+
// readLine reads a full line starting from a given position.
|
63
|
+
func readLine(data []byte, start int, fileSize int) string {
|
64
|
+
end := start
|
65
|
+
for end < fileSize && data[end] != '\n' {
|
66
|
+
end++
|
67
|
+
}
|
68
|
+
return string(data[start:end])
|
69
|
+
}
|
70
|
+
|
71
|
+
// binarySearchPrefix performs a binary search for a prefix.
|
72
|
+
func binarySearchPrefix(m *Mmapper, prefix string) string {
|
73
|
+
low, high := 0, m.fileSize-1
|
74
|
+
var match string
|
75
|
+
|
76
|
+
for low <= high {
|
77
|
+
mid := (low + high) / 2
|
78
|
+
mid = findLineStart(m.mmapData, mid)
|
79
|
+
|
80
|
+
line := readLine(m.mmapData, mid, m.fileSize)
|
81
|
+
if bytes.HasPrefix([]byte(line), []byte(prefix)) {
|
82
|
+
match = line
|
83
|
+
high = mid - 1
|
84
|
+
} else if line < prefix {
|
85
|
+
low = mid + len(line) + 1
|
86
|
+
} else {
|
87
|
+
high = mid - 1
|
88
|
+
}
|
89
|
+
}
|
90
|
+
return match
|
91
|
+
}
|
92
|
+
|
93
|
+
//export CreateMmapper
|
94
|
+
func CreateMmapper(filename *C.char) C.int {
|
95
|
+
m, err := mmapFile(C.GoString(filename))
|
96
|
+
if err != nil {
|
97
|
+
return -1 // Error case
|
98
|
+
}
|
99
|
+
|
100
|
+
mu.Lock()
|
101
|
+
id := nextID
|
102
|
+
nextID++
|
103
|
+
mmappers[id] = m
|
104
|
+
mu.Unlock()
|
105
|
+
|
106
|
+
return C.int(id)
|
107
|
+
}
|
108
|
+
|
109
|
+
//export FindMatchingLine
|
110
|
+
func FindMatchingLine(mmapperID C.int, prefix *C.char) *C.char {
|
111
|
+
mu.Lock()
|
112
|
+
m, exists := mmappers[int(mmapperID)]
|
113
|
+
mu.Unlock()
|
114
|
+
|
115
|
+
if !exists {
|
116
|
+
return nil
|
117
|
+
}
|
118
|
+
|
119
|
+
match := binarySearchPrefix(m, C.GoString(prefix))
|
120
|
+
if match == "" {
|
121
|
+
return nil
|
122
|
+
}
|
123
|
+
|
124
|
+
return C.CString(match)
|
125
|
+
}
|
126
|
+
|
127
|
+
func main() {}
|
data/lib/mmapper.rb
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'ffi'
|
2
|
+
|
3
|
+
module Mmapper
|
4
|
+
extend FFI::Library
|
5
|
+
|
6
|
+
LIB_DIR = File.expand_path("../../ext", __FILE__)
|
7
|
+
|
8
|
+
LIB_NAME =
|
9
|
+
case RUBY_PLATFORM
|
10
|
+
when /darwin/ then "libmmapper.dylib"
|
11
|
+
when /mingw|mswin/ then "mmapper.dll"
|
12
|
+
else "libmmapper.so"
|
13
|
+
end
|
14
|
+
|
15
|
+
LIB_PATH = File.join(LIB_DIR, LIB_NAME)
|
16
|
+
|
17
|
+
unless File.exist?(LIB_PATH)
|
18
|
+
Dir.chdir(LIB_DIR) do
|
19
|
+
puts "Compiling Go shared library for #{RUBY_PLATFORM}..."
|
20
|
+
system("go build -o #{LIB_NAME} -buildmode=c-shared .") || raise("Go build failed")
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
ffi_lib LIB_PATH
|
25
|
+
|
26
|
+
attach_function :create_mmapper, :CreateMmapper, [:string], :int
|
27
|
+
attach_function :find_matching_line, :FindMatchingLine, [:int, :string], :string
|
28
|
+
|
29
|
+
class Instance
|
30
|
+
def initialize(filename)
|
31
|
+
@mmapper_id = Mmapper.create_mmapper(filename)
|
32
|
+
raise "Failed to load file: #{filename}" if @mmapper_id < 0
|
33
|
+
end
|
34
|
+
|
35
|
+
def find_matching_line(prefix)
|
36
|
+
result = Mmapper.find_matching_line(@mmapper_id, prefix)
|
37
|
+
return nil if result.nil? || result.empty?
|
38
|
+
result
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.load_file(filename)
|
43
|
+
Instance.new(filename)
|
44
|
+
end
|
45
|
+
end
|
metadata
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: mmapper
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Carl Dawson
|
8
|
+
bindir: bin
|
9
|
+
cert_chain: []
|
10
|
+
date: 2025-01-31 00:00:00.000000000 Z
|
11
|
+
dependencies:
|
12
|
+
- !ruby/object:Gem::Dependency
|
13
|
+
name: ffi
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
15
|
+
requirements:
|
16
|
+
- - "~>"
|
17
|
+
- !ruby/object:Gem::Version
|
18
|
+
version: '1.15'
|
19
|
+
type: :runtime
|
20
|
+
prerelease: false
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
22
|
+
requirements:
|
23
|
+
- - "~>"
|
24
|
+
- !ruby/object:Gem::Version
|
25
|
+
version: '1.15'
|
26
|
+
description: Wraps a Go extension for mmap-ing files.
|
27
|
+
email:
|
28
|
+
- email@carldaws.com
|
29
|
+
executables: []
|
30
|
+
extensions:
|
31
|
+
- ext/extconf.rb
|
32
|
+
extra_rdoc_files: []
|
33
|
+
files:
|
34
|
+
- README.md
|
35
|
+
- ext/extconf.rb
|
36
|
+
- ext/mmap.go
|
37
|
+
- lib/mmapper.rb
|
38
|
+
homepage: https://github.com/carldaws/mmapper
|
39
|
+
licenses:
|
40
|
+
- MIT
|
41
|
+
metadata: {}
|
42
|
+
rdoc_options: []
|
43
|
+
require_paths:
|
44
|
+
- lib
|
45
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '0'
|
50
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
requirements: []
|
56
|
+
rubygems_version: 3.6.2
|
57
|
+
specification_version: 4
|
58
|
+
summary: Mmap-ed files using Go and FFI.
|
59
|
+
test_files: []
|