ractor_queue 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,127 @@
1
+ /* -*- mode: c++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
2
+ #ifndef ATOMIC_QUEUE_DEFS_H_INCLUDED
3
+ #define ATOMIC_QUEUE_DEFS_H_INCLUDED
4
+
5
+ // Copyright (c) 2019 Maxim Egorushkin. MIT License. See the full licence in file LICENSE.
6
+
7
+ #include <atomic>
8
+
9
+ ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
10
+
11
+ // Define a CPU-specific spin_loop_pause function.
12
+ // "static inline" documentation: https://gcc.gnu.org/onlinedocs/gcc/Inline.html
13
+
14
+ #if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)
15
+ #include <emmintrin.h>
16
+ #endif
17
+
18
+ namespace atomic_queue {
19
+
20
+ #if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)
21
+ constexpr int CACHE_LINE_SIZE = 64;
22
+ static inline void spin_loop_pause() noexcept {
23
+ _mm_pause();
24
+ }
25
+
26
+ #elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM64)
27
+ /*
28
+ TODO:
29
+
30
+ "asm volatile" is adequate and sufficient to prevent the asm statements from being reordered by a compiler relative to any and all preceding and following statements.
31
+
32
+ The "memory" clobber in an asm statement invalidates all memory and all registers loaded from memory prior to the asm statement, forcing compilers to emit otherwise unnecessary machine code to reload registers from memory. "memory" clobbers is one of the worst performance killers -- a key motivation for inventing std::memory_order. std::memory_order is the extreme opposite of asm "memory" clobbers enabling fine-grained control of non-atomic instruction reordering relative to atomic instructions, obviating the need to ever use the most detrimental and undesirable asm "memory" clobbers.
33
+
34
+ The asm "memory" clobbers defeat and undo any and all positive effects of the precise weakest and cheapest possible std::memory_order arguments this library calls std::atomic member functions with. The benchmarks built for ARM are unlikely to perform anywhere near/similar to the x86_64 benchmark levels of performance with the asm "memory" clobber.
35
+
36
+ The effects of asm "memory" clobber have only recently become intuitively familiar to me and I don't have access to a multi-core ARM workstation to benchmark the performance boost of removing the asm "memory" clobber. Which I expect to be significant, based on my experience of relaxing std::memory_orders on x86_64 platform. Hence, benchmarking on multi-core ARM hardware is required to validate/justify removing the "memory" clobber.
37
+ */
38
+
39
+ constexpr int CACHE_LINE_SIZE = 64;
40
+ static inline void spin_loop_pause() noexcept {
41
+ #if (defined(__ARM_ARCH_6K__) || \
42
+ defined(__ARM_ARCH_6Z__) || \
43
+ defined(__ARM_ARCH_6ZK__) || \
44
+ defined(__ARM_ARCH_6T2__) || \
45
+ defined(__ARM_ARCH_7__) || \
46
+ defined(__ARM_ARCH_7A__) || \
47
+ defined(__ARM_ARCH_7R__) || \
48
+ defined(__ARM_ARCH_7M__) || \
49
+ defined(__ARM_ARCH_7S__) || \
50
+ defined(__ARM_ARCH_8A__) || \
51
+ defined(__aarch64__))
52
+ asm volatile ("yield" ::: "memory");
53
+ #elif defined(_M_ARM64)
54
+ __yield();
55
+ #else
56
+ asm volatile ("nop" ::: "memory");
57
+ #endif
58
+ }
59
+
60
+ #elif defined(__ppc64__) || defined(__powerpc64__)
61
+ constexpr int CACHE_LINE_SIZE = 128;
62
+ static inline void spin_loop_pause() noexcept {
63
+ asm volatile("or 31,31,31 # very low priority");
64
+ }
65
+
66
+ #elif defined(__s390x__)
67
+ constexpr int CACHE_LINE_SIZE = 256;
68
+ static inline void spin_loop_pause() noexcept {} // TODO: Find the right instruction to use here, if any.
69
+
70
+ #elif defined(__riscv)
71
+ constexpr int CACHE_LINE_SIZE = 64;
72
+ static inline void spin_loop_pause() noexcept {
73
+ asm volatile (".insn i 0x0F, 0, x0, x0, 0x010");
74
+ }
75
+
76
+ #elif defined(__loongarch__)
77
+ constexpr int CACHE_LINE_SIZE = 64;
78
+ static inline void spin_loop_pause() noexcept {
79
+ asm volatile("nop \n nop \n nop \n nop \n nop \n nop \n nop \n nop");
80
+ }
81
+
82
+ #else
83
+ #ifdef _MSC_VER
84
+ #pragma message("Unknown CPU architecture. Using L1 cache line size of 64 bytes and no spinloop pause instruction.")
85
+ #else
86
+ #warning "Unknown CPU architecture. Using L1 cache line size of 64 bytes and no spinloop pause instruction."
87
+ #endif
88
+
89
+ constexpr int CACHE_LINE_SIZE = 64; // TODO: Review that this is the correct value.
90
+ static inline void spin_loop_pause() noexcept {}
91
+ #endif
92
+
93
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
94
+
95
+ auto constexpr A = std::memory_order_acquire;
96
+ auto constexpr R = std::memory_order_release;
97
+ auto constexpr X = std::memory_order_relaxed;
98
+ auto constexpr C = std::memory_order_seq_cst;
99
+ auto constexpr AR = std::memory_order_acq_rel;
100
+
101
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
102
+
103
+ } // namespace atomic_queue
104
+
105
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
106
+
107
+ #if defined(__GNUC__) || defined(__clang__)
108
+ #define ATOMIC_QUEUE_LIKELY(expr) __builtin_expect(static_cast<bool>(expr), 1)
109
+ #define ATOMIC_QUEUE_UNLIKELY(expr) __builtin_expect(static_cast<bool>(expr), 0)
110
+ #define ATOMIC_QUEUE_NOINLINE __attribute__((noinline))
111
+ #define ATOMIC_QUEUE_INLINE inline __attribute__((always_inline))
112
+ #define ATOMIC_QUEUE_RESTRICT __restrict__
113
+ #else
114
+ #define ATOMIC_QUEUE_LIKELY(expr) (expr)
115
+ #define ATOMIC_QUEUE_UNLIKELY(expr) (expr)
116
+ #define ATOMIC_QUEUE_NOINLINE
117
+ #define ATOMIC_QUEUE_INLINE inline
118
+ #ifdef _MSC_VER
119
+ #define ATOMIC_QUEUE_RESTRICT __restrict
120
+ #else
121
+ #define ATOMIC_QUEUE_RESTRICT
122
+ #endif
123
+ #endif
124
+
125
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
126
+
127
+ #endif // ATOMIC_QUEUE_DEFS_H_INCLUDED
metadata ADDED
@@ -0,0 +1,110 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ractor_queue
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Dewayne VanHoozer
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: rice
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '4.0'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: '4.0'
26
+ - !ruby/object:Gem::Dependency
27
+ name: rake-compiler
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '1.2'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '1.2'
40
+ - !ruby/object:Gem::Dependency
41
+ name: rake
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '13.0'
47
+ type: :development
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '13.0'
54
+ - !ruby/object:Gem::Dependency
55
+ name: minitest
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: '5.0'
61
+ type: :development
62
+ prerelease: false
63
+ version_requirements: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '5.0'
68
+ description: A lock-free MPMC queue that can be shared across Ruby Ractors — the only
69
+ Ractor-safe bounded queue option since Ruby's built-in Queue uses Mutex and cannot
70
+ cross Ractor boundaries.
71
+ email:
72
+ - dvanhoozer@gmail.com
73
+ executables: []
74
+ extensions:
75
+ - ext/ractor_queue/extconf.rb
76
+ extra_rdoc_files: []
77
+ files:
78
+ - README.md
79
+ - ext/ractor_queue/extconf.rb
80
+ - ext/ractor_queue/ractor_queue.cpp
81
+ - ext/ractor_queue/standard_queue.h
82
+ - lib/ractor_queue.rb
83
+ - lib/ractor_queue/errors.rb
84
+ - lib/ractor_queue/interface.rb
85
+ - lib/ractor_queue/ractor_queue.rb
86
+ - lib/ractor_queue/version.rb
87
+ - vendor/atomic_queue/include/atomic_queue/atomic_queue.h
88
+ - vendor/atomic_queue/include/atomic_queue/defs.h
89
+ homepage: https://github.com/MadBomber/ractor_queue
90
+ licenses:
91
+ - MIT
92
+ metadata: {}
93
+ rdoc_options: []
94
+ require_paths:
95
+ - lib
96
+ required_ruby_version: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ version: 3.2.0
101
+ required_rubygems_version: !ruby/object:Gem::Requirement
102
+ requirements:
103
+ - - ">="
104
+ - !ruby/object:Gem::Version
105
+ version: '0'
106
+ requirements: []
107
+ rubygems_version: 4.0.10
108
+ specification_version: 4
109
+ summary: Ractor-shareable bounded queue for Ruby parallel workloads
110
+ test_files: []