tensor 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/AGENTS.md +38 -0
- data/extconf.rb +148 -0
- data/lib/tensor.rb +6 -0
- data/ruby_matrix.c +1367 -0
- metadata +46 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: ab1431a30c46e725f4719be6d7af3f06f77720741f858448528f23528452baea
|
|
4
|
+
data.tar.gz: 2282380204a6d8b8a839365b265cb48443dd16e1e6206bff1d6d7c68af573421
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 785a6cf18705e00c2bad28b89a1b2682b139f2423f3ba683fdfef5f2bf3204b4f1a5681c47d2734f8235585bc36e20d2c80790e0abfbf0ee5f625e2384728361
|
|
7
|
+
data.tar.gz: 7cdea95dba2a3169e10d8b1d425ada3bb06113dfca07d63287d7cc54f9014a025d31adb8a445375c5f9b00c96d5ae0532d39c39d588211f5308a36caa9743596
|
data/AGENTS.md
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Repository Guidelines
|
|
2
|
+
|
|
3
|
+
## Project Structure & Module Organization
|
|
4
|
+
|
|
5
|
+
- Core C extension lives in `ruby_matrix.c` at the repo root.
|
|
6
|
+
- Build configuration is handled by `extconf.rb`, which generates the `Makefile`.
|
|
7
|
+
- Build artifacts include `ruby_matrix.bundle`, `ruby_matrix.o`, and other compiled outputs; treat these as generated files.
|
|
8
|
+
|
|
9
|
+
## Build, Test, and Development Commands
|
|
10
|
+
|
|
11
|
+
- `ruby extconf.rb` – generate a fresh `Makefile` for the Ruby C extension (preferred over editing `Makefile` directly).
|
|
12
|
+
- `make` – compile the extension, enabling OpenMP and OpenBLAS when available.
|
|
13
|
+
- `make clean` – remove compiled artifacts before rebuilding or changing toolchain settings.
|
|
14
|
+
- After building, validate in a host Ruby project or with a small script that exercises matrix operations and ReLU on sample data.
|
|
15
|
+
|
|
16
|
+
## Coding Style & Naming Conventions
|
|
17
|
+
|
|
18
|
+
- C code uses 4-space indentation, braces on the same line, and descriptive names (`Matrix`, `matrix_multiply`, `matrix_relu_grad`, etc.).
|
|
19
|
+
- Prefer clear, explicit error handling via `rb_raise` with actionable messages.
|
|
20
|
+
- Keep configuration changes in `extconf.rb` (OpenMP, OpenBLAS, include/library paths); do not hand-edit the generated `Makefile`.
|
|
21
|
+
- Avoid introducing one-letter variables in new code; keep naming consistent with existing functions and types.
|
|
22
|
+
|
|
23
|
+
## Testing Guidelines
|
|
24
|
+
|
|
25
|
+
- There is no standalone test suite in this repository; rely on the consuming Ruby project’s tests and targeted smoke scripts.
|
|
26
|
+
- When adding behavior, include a minimal Ruby script (e.g., under `test/` or `examples/` if created) that constructs small matrices and verifies key operations.
|
|
27
|
+
- Aim to keep new behavior covered by at least one fast, deterministic check that can run locally after `make`.
|
|
28
|
+
|
|
29
|
+
## Commit & Pull Request Guidelines
|
|
30
|
+
|
|
31
|
+
- Use concise, imperative commit messages (e.g., “Optimize float32 multiply path”, “Fix NEON detection on ARM”).
|
|
32
|
+
- Keep changes focused: separate performance tuning, API changes, and build-system adjustments into distinct commits when possible.
|
|
33
|
+
- In PR descriptions, summarize the change, note any performance impact or dependency changes (e.g., new `libomp`/OpenBLAS requirements), and describe how you validated the build.
|
|
34
|
+
|
|
35
|
+
## Security & Configuration Tips
|
|
36
|
+
|
|
37
|
+
- This extension depends on system libraries like OpenMP (`libomp`) and OpenBLAS; prefer configuration via `extconf.rb` and environment variables rather than hardcoded absolute paths.
|
|
38
|
+
- Avoid embedding machine-specific paths in committed files; keep paths generic and documented so the extension can build on similar environments.
|
data/extconf.rb
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
require 'mkmf'
|
|
2
|
+
require 'rbconfig'
|
|
3
|
+
|
|
4
|
+
# Logging level
|
|
5
|
+
$VERBOSE = true
|
|
6
|
+
|
|
7
|
+
host_os = RbConfig::CONFIG['host_os']
|
|
8
|
+
on_macos = host_os =~ /darwin/
|
|
9
|
+
|
|
10
|
+
# Debug environment variables
|
|
11
|
+
puts "PKG_CONFIG_PATH: #{ENV['PKG_CONFIG_PATH']}"
|
|
12
|
+
puts "PATH: #{ENV['PATH']}"
|
|
13
|
+
puts "pkg-config location: #{`which pkg-config`.chomp}"
|
|
14
|
+
|
|
15
|
+
# Set absolute path to pkg-config if needed (primarily for Homebrew on macOS)
|
|
16
|
+
if on_macos && ENV['PKG_CONFIG'].to_s.empty?
|
|
17
|
+
ENV['PKG_CONFIG'] = '/opt/homebrew/bin/pkg-config'
|
|
18
|
+
end
|
|
19
|
+
puts "Using pkg-config at: #{ENV['PKG_CONFIG']}"
|
|
20
|
+
|
|
21
|
+
# Check for required headers
|
|
22
|
+
puts "Checking for ruby.h..."
|
|
23
|
+
unless have_header('ruby.h')
|
|
24
|
+
abort "ruby.h not found. Please ensure Ruby development headers are installed."
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Detect architecture
|
|
28
|
+
is_arm = RbConfig::CONFIG['host_cpu'] =~ /arm|aarch64/
|
|
29
|
+
is_x86 = RbConfig::CONFIG['host_cpu'] =~ /x86|x64|i386/
|
|
30
|
+
|
|
31
|
+
# Detect if we're using Apple's clang
|
|
32
|
+
is_apple_clang = RbConfig::CONFIG['CC'] =~ /^clang/ && RUBY_PLATFORM =~ /darwin/
|
|
33
|
+
|
|
34
|
+
# OpenMP support for macOS
|
|
35
|
+
if is_apple_clang
|
|
36
|
+
puts "Detected Apple's clang, using LLVM OpenMP..."
|
|
37
|
+
# Use LLVM's OpenMP
|
|
38
|
+
libomp_prefix = "/opt/homebrew/opt/libomp"
|
|
39
|
+
if Dir.exist?(libomp_prefix)
|
|
40
|
+
$INCFLAGS << " -I#{libomp_prefix}/include"
|
|
41
|
+
$LDFLAGS << " -L#{libomp_prefix}/lib"
|
|
42
|
+
$CFLAGS << " -Xpreprocessor -fopenmp"
|
|
43
|
+
$LDFLAGS << " -lomp"
|
|
44
|
+
have_library('omp')
|
|
45
|
+
puts "LLVM OpenMP support enabled"
|
|
46
|
+
else
|
|
47
|
+
puts "Warning: OpenMP not found. Some optimizations will be disabled."
|
|
48
|
+
puts "To enable OpenMP: brew install libomp"
|
|
49
|
+
end
|
|
50
|
+
else
|
|
51
|
+
# Standard OpenMP support for non-Apple compilers
|
|
52
|
+
if have_library('omp') || have_library('gomp')
|
|
53
|
+
$CFLAGS << " -fopenmp"
|
|
54
|
+
$LDFLAGS << " -fopenmp"
|
|
55
|
+
puts "OpenMP support enabled"
|
|
56
|
+
else
|
|
57
|
+
puts "Warning: OpenMP not found. Some optimizations will be disabled."
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Optional OpenBLAS support
|
|
62
|
+
openblas_include_path = nil
|
|
63
|
+
openblas_lib_path = nil
|
|
64
|
+
|
|
65
|
+
if on_macos
|
|
66
|
+
# Allow overriding OpenBLAS location, otherwise try Homebrew cellars
|
|
67
|
+
explicit_openblas = ENV['OPENBLAS_DIR']
|
|
68
|
+
if explicit_openblas && Dir.exist?(explicit_openblas)
|
|
69
|
+
openblas_root = explicit_openblas
|
|
70
|
+
else
|
|
71
|
+
openblas_versions = Dir.glob("/opt/homebrew/Cellar/openblas/*").sort
|
|
72
|
+
openblas_root = openblas_versions.last
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
if openblas_root && Dir.exist?(openblas_root)
|
|
76
|
+
openblas_include_path = File.join(openblas_root, "include")
|
|
77
|
+
openblas_lib_path = File.join(openblas_root, "lib")
|
|
78
|
+
$INCFLAGS << " -I#{openblas_include_path}"
|
|
79
|
+
$LDFLAGS << " -L#{openblas_lib_path}"
|
|
80
|
+
$LIBPATH << openblas_lib_path
|
|
81
|
+
else
|
|
82
|
+
puts "OpenBLAS not found under /opt/homebrew/Cellar; BLAS optimizations may be disabled."
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Check for BLAS support using pkg-config (cross-platform)
|
|
87
|
+
puts "Checking for BLAS support..."
|
|
88
|
+
if pkg_config("openblas")
|
|
89
|
+
puts "BLAS support found via pkg-config! Enabling BLAS optimizations."
|
|
90
|
+
elsif openblas_lib_path && Dir.exist?(openblas_lib_path)
|
|
91
|
+
puts "pkg-config failed to find OpenBLAS. Using manual configuration from #{openblas_lib_path}."
|
|
92
|
+
$LDFLAGS << " -lopenblas"
|
|
93
|
+
else
|
|
94
|
+
puts "OpenBLAS not found. Continuing without BLAS-specific optimizations."
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Clean up incompatible OpenMP flags from pkg-config when using Apple's clang
|
|
98
|
+
if is_apple_clang
|
|
99
|
+
$CFLAGS = $CFLAGS.to_s.gsub(/\s-fopenmp\b/, "")
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Check for math library
|
|
103
|
+
have_library('m')
|
|
104
|
+
|
|
105
|
+
# Check for SIMD headers based on architecture
|
|
106
|
+
puts "Checking for SIMD headers..."
|
|
107
|
+
if is_x86 && have_header('immintrin.h')
|
|
108
|
+
$CFLAGS << " -mavx2"
|
|
109
|
+
$defs << "-DHAVE_IMMINTRIN_H"
|
|
110
|
+
puts "AVX2 support found! Enabling AVX2 optimizations."
|
|
111
|
+
elsif is_arm && have_header('arm_neon.h')
|
|
112
|
+
if RbConfig::CONFIG['host_os'] =~ /darwin/
|
|
113
|
+
$CFLAGS << " -DHAVE_ARM_NEON_H"
|
|
114
|
+
puts "ARM NEON support found on macOS! Enabling NEON optimizations."
|
|
115
|
+
else
|
|
116
|
+
$CFLAGS << " -DHAVE_ARM_NEON_H -mfpu=neon"
|
|
117
|
+
puts "ARM NEON support found! Enabling NEON optimizations."
|
|
118
|
+
end
|
|
119
|
+
$defs << "-DHAVE_ARM_NEON_H"
|
|
120
|
+
else
|
|
121
|
+
puts "No SIMD headers found. SIMD optimizations will not be available."
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Add optimization flags
|
|
125
|
+
$CFLAGS << " -O3 -march=native"
|
|
126
|
+
$CFLAGS << " -fno-common -pipe"
|
|
127
|
+
$CFLAGS << " -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE -D_DARWIN_UNLIMITED_SELECT -D_REENTRANT"
|
|
128
|
+
|
|
129
|
+
# Additional include and library paths
|
|
130
|
+
$INCFLAGS << " -I/opt/homebrew/include"
|
|
131
|
+
$INCFLAGS << " -I/usr/local/include"
|
|
132
|
+
$LDFLAGS << " -L/opt/homebrew/lib"
|
|
133
|
+
$LDFLAGS << " -L/usr/local/lib"
|
|
134
|
+
|
|
135
|
+
# Ensure proper linking for neural network operations
|
|
136
|
+
$defs << "-DUSE_NEURAL_OPS"
|
|
137
|
+
$defs << "-DUSE_PARALLEL_OPS"
|
|
138
|
+
|
|
139
|
+
# Print final configuration
|
|
140
|
+
puts "\nFinal Configuration:"
|
|
141
|
+
puts "Compiler: #{RbConfig::CONFIG['CC']}"
|
|
142
|
+
puts "CFLAGS: #{$CFLAGS}"
|
|
143
|
+
puts "LDFLAGS: #{$LDFLAGS}"
|
|
144
|
+
puts "INCFLAGS: #{$INCFLAGS}"
|
|
145
|
+
puts "Definitions: #{$defs.join(' ')}"
|
|
146
|
+
|
|
147
|
+
# Create the makefile for the native extension (tensor_ext)
|
|
148
|
+
create_makefile('tensor_ext')
|