iceberg 0.10.0 → 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/Cargo.lock +6494 -0
- data/Cargo.toml +6 -0
- data/README.md +2 -0
- data/ext/iceberg/Cargo.toml +34 -0
- data/ext/iceberg/src/arrow.rs +22 -0
- data/ext/iceberg/src/catalog.rs +315 -0
- data/ext/iceberg/src/error.rs +32 -0
- data/ext/iceberg/src/lib.rs +127 -0
- data/ext/iceberg/src/runtime.rs +33 -0
- data/ext/iceberg/src/scan.rs +47 -0
- data/ext/iceberg/src/table.rs +369 -0
- data/ext/iceberg/src/utils.rs +245 -0
- data/lib/iceberg/catalog.rb +6 -1
- data/lib/iceberg/table.rb +10 -6
- data/lib/iceberg/version.rb +1 -1
- metadata +12 -1
data/Cargo.toml
ADDED
data/README.md
CHANGED
@@ -0,0 +1,34 @@
|
|
1
|
+
[package]
|
2
|
+
name = "iceberg"
|
3
|
+
version = "0.10.1"
|
4
|
+
license = "Apache-2.0"
|
5
|
+
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
|
+
edition = "2024"
|
7
|
+
rust-version = "1.86"
|
8
|
+
publish = false
|
9
|
+
|
10
|
+
[lib]
|
11
|
+
crate-type = ["cdylib"]
|
12
|
+
|
13
|
+
[dependencies]
|
14
|
+
arrow-array = { version = "55", features = ["ffi"] }
|
15
|
+
arrow-schema = "55"
|
16
|
+
datafusion = { version = "47", optional = true }
|
17
|
+
futures = "0.3"
|
18
|
+
iceberg = "0.6"
|
19
|
+
iceberg-catalog-glue = { version = "0.6", optional = true }
|
20
|
+
iceberg-catalog-rest = { version = "0.6", optional = true }
|
21
|
+
iceberg-catalog-sql = { version = "0.6", optional = true }
|
22
|
+
iceberg-datafusion = { version = "0.6", optional = true }
|
23
|
+
magnus = "0.8"
|
24
|
+
parquet = "55"
|
25
|
+
sqlx = { version = "0.8", features = ["postgres", "runtime-tokio", "sqlite"], default-features = false, optional = true }
|
26
|
+
tokio = { version = "1", features = ["rt-multi-thread"] }
|
27
|
+
uuid = { version = "1", features = ["v4"] }
|
28
|
+
|
29
|
+
[features]
|
30
|
+
default = ["rest", "sql"]
|
31
|
+
datafusion = ["dep:datafusion", "dep:iceberg-datafusion"]
|
32
|
+
glue = ["dep:iceberg-catalog-glue"]
|
33
|
+
rest = ["dep:iceberg-catalog-rest"]
|
34
|
+
sql = ["dep:iceberg-catalog-sql", "dep:sqlx"]
|
@@ -0,0 +1,22 @@
|
|
1
|
+
use arrow_array::ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream};
|
2
|
+
use magnus::{Error as RbErr, Ruby, TryConvert, Value, prelude::*};
|
3
|
+
|
4
|
+
use crate::RbResult;
|
5
|
+
|
6
|
+
pub struct RbArrowType<T>(pub T);
|
7
|
+
|
8
|
+
impl TryConvert for RbArrowType<ArrowArrayStreamReader> {
|
9
|
+
fn try_convert(val: Value) -> RbResult<Self> {
|
10
|
+
let ruby = Ruby::get_with(val);
|
11
|
+
let addr: usize = val.funcall("to_i", ())?;
|
12
|
+
|
13
|
+
// use similar approach as Polars to consume pointer and avoid copy
|
14
|
+
let stream_ptr =
|
15
|
+
Box::new(unsafe { std::ptr::replace(addr as _, FFI_ArrowArrayStream::empty()) });
|
16
|
+
|
17
|
+
Ok(RbArrowType(
|
18
|
+
ArrowArrayStreamReader::try_new(*stream_ptr)
|
19
|
+
.map_err(|e| RbErr::new(ruby.exception_arg_error(), e.to_string()))?,
|
20
|
+
))
|
21
|
+
}
|
22
|
+
}
|
@@ -0,0 +1,315 @@
|
|
1
|
+
#[cfg(feature = "datafusion")]
|
2
|
+
use datafusion::execution::context::SessionContext;
|
3
|
+
use iceberg::io::{FileIO, FileIOBuilder};
|
4
|
+
use iceberg::spec::Schema;
|
5
|
+
use iceberg::{Catalog, MemoryCatalog, NamespaceIdent, TableCreation, TableIdent};
|
6
|
+
#[cfg(feature = "glue")]
|
7
|
+
use iceberg_catalog_glue::{GlueCatalog, GlueCatalogConfig};
|
8
|
+
#[cfg(feature = "rest")]
|
9
|
+
use iceberg_catalog_rest::{RestCatalog, RestCatalogConfig};
|
10
|
+
#[cfg(feature = "sql")]
|
11
|
+
use iceberg_catalog_sql::{SqlBindStyle, SqlCatalog, SqlCatalogConfig};
|
12
|
+
#[cfg(feature = "datafusion")]
|
13
|
+
use iceberg_datafusion::IcebergCatalogProvider;
|
14
|
+
use std::cell::RefCell;
|
15
|
+
use std::collections::HashMap;
|
16
|
+
use std::sync::Arc;
|
17
|
+
|
18
|
+
use crate::error::to_rb_err;
|
19
|
+
use crate::runtime::runtime;
|
20
|
+
use crate::utils::Wrap;
|
21
|
+
use crate::{RbResult, RbTable};
|
22
|
+
|
23
|
+
pub enum RbCatalogType {
|
24
|
+
#[cfg(feature = "glue")]
|
25
|
+
Glue(Arc<GlueCatalog>),
|
26
|
+
Memory(Arc<MemoryCatalog>),
|
27
|
+
#[cfg(feature = "rest")]
|
28
|
+
Rest(Arc<RestCatalog>),
|
29
|
+
#[cfg(feature = "sql")]
|
30
|
+
Sql(Arc<SqlCatalog>),
|
31
|
+
}
|
32
|
+
|
33
|
+
impl RbCatalogType {
|
34
|
+
pub fn as_catalog(&self) -> &dyn Catalog {
|
35
|
+
match self {
|
36
|
+
#[cfg(feature = "glue")]
|
37
|
+
RbCatalogType::Glue(v) => v.as_ref(),
|
38
|
+
RbCatalogType::Memory(v) => v.as_ref(),
|
39
|
+
#[cfg(feature = "rest")]
|
40
|
+
RbCatalogType::Rest(v) => v.as_ref(),
|
41
|
+
#[cfg(feature = "sql")]
|
42
|
+
RbCatalogType::Sql(v) => v.as_ref(),
|
43
|
+
}
|
44
|
+
}
|
45
|
+
|
46
|
+
#[cfg(feature = "datafusion")]
|
47
|
+
fn as_arc(&self) -> Arc<dyn Catalog> {
|
48
|
+
match self {
|
49
|
+
#[cfg(feature = "glue")]
|
50
|
+
RbCatalogType::Glue(v) => v.clone(),
|
51
|
+
RbCatalogType::Memory(v) => v.clone(),
|
52
|
+
#[cfg(feature = "rest")]
|
53
|
+
RbCatalogType::Rest(v) => v.clone(),
|
54
|
+
#[cfg(feature = "sql")]
|
55
|
+
RbCatalogType::Sql(v) => v.clone(),
|
56
|
+
}
|
57
|
+
}
|
58
|
+
}
|
59
|
+
|
60
|
+
#[magnus::wrap(class = "Iceberg::RbCatalog")]
|
61
|
+
pub struct RbCatalog {
|
62
|
+
pub catalog: RefCell<RbCatalogType>,
|
63
|
+
}
|
64
|
+
|
65
|
+
impl RbCatalog {
|
66
|
+
#[cfg(feature = "glue")]
|
67
|
+
pub fn new_glue(warehouse: String) -> RbResult<Self> {
|
68
|
+
let config = GlueCatalogConfig::builder().warehouse(warehouse).build();
|
69
|
+
let catalog = runtime()
|
70
|
+
.block_on(GlueCatalog::new(config))
|
71
|
+
.map_err(to_rb_err)?;
|
72
|
+
Ok(Self {
|
73
|
+
catalog: RbCatalogType::Glue(catalog.into()).into(),
|
74
|
+
})
|
75
|
+
}
|
76
|
+
|
77
|
+
pub fn new_memory(warehouse: Option<String>) -> RbResult<Self> {
|
78
|
+
let file_io = match warehouse {
|
79
|
+
Some(ref v) => FileIO::from_path(v)
|
80
|
+
.map_err(to_rb_err)?
|
81
|
+
.build()
|
82
|
+
.map_err(to_rb_err)?,
|
83
|
+
None => FileIOBuilder::new_fs_io().build().map_err(to_rb_err)?,
|
84
|
+
};
|
85
|
+
let catalog = MemoryCatalog::new(file_io, warehouse);
|
86
|
+
Ok(Self {
|
87
|
+
catalog: RbCatalogType::Memory(catalog.into()).into(),
|
88
|
+
})
|
89
|
+
}
|
90
|
+
|
91
|
+
#[cfg(feature = "rest")]
|
92
|
+
pub fn new_rest(
|
93
|
+
uri: String,
|
94
|
+
warehouse: Option<String>,
|
95
|
+
props: HashMap<String, String>,
|
96
|
+
) -> Self {
|
97
|
+
let config = RestCatalogConfig::builder()
|
98
|
+
.uri(uri)
|
99
|
+
.warehouse_opt(warehouse)
|
100
|
+
.props(props)
|
101
|
+
.build();
|
102
|
+
let catalog = RestCatalog::new(config);
|
103
|
+
Self {
|
104
|
+
catalog: RbCatalogType::Rest(catalog.into()).into(),
|
105
|
+
}
|
106
|
+
}
|
107
|
+
|
108
|
+
#[cfg(feature = "sql")]
|
109
|
+
pub fn new_sql(
|
110
|
+
uri: String,
|
111
|
+
warehouse: String,
|
112
|
+
name: String,
|
113
|
+
props: HashMap<String, String>,
|
114
|
+
) -> RbResult<Self> {
|
115
|
+
let file_io = FileIO::from_path(&warehouse)
|
116
|
+
.map_err(to_rb_err)?
|
117
|
+
.build()
|
118
|
+
.map_err(to_rb_err)?;
|
119
|
+
let config = SqlCatalogConfig::builder()
|
120
|
+
.uri(uri)
|
121
|
+
.warehouse_location(warehouse)
|
122
|
+
.name(name)
|
123
|
+
.file_io(file_io)
|
124
|
+
.sql_bind_style(SqlBindStyle::DollarNumeric)
|
125
|
+
.props(props)
|
126
|
+
.build();
|
127
|
+
let catalog = runtime()
|
128
|
+
.block_on(SqlCatalog::new(config))
|
129
|
+
.map_err(to_rb_err)?;
|
130
|
+
Ok(Self {
|
131
|
+
catalog: RbCatalogType::Sql(catalog.into()).into(),
|
132
|
+
})
|
133
|
+
}
|
134
|
+
|
135
|
+
pub fn list_namespaces(
|
136
|
+
&self,
|
137
|
+
parent: Option<Wrap<NamespaceIdent>>,
|
138
|
+
) -> RbResult<Vec<Vec<String>>> {
|
139
|
+
let namespaces = runtime()
|
140
|
+
.block_on(
|
141
|
+
self.catalog
|
142
|
+
.borrow()
|
143
|
+
.as_catalog()
|
144
|
+
.list_namespaces(parent.map(|v| v.0).as_ref()),
|
145
|
+
)
|
146
|
+
.map_err(to_rb_err)?;
|
147
|
+
Ok(namespaces.iter().map(|v| v.clone().inner()).collect())
|
148
|
+
}
|
149
|
+
|
150
|
+
pub fn create_namespace(
|
151
|
+
&self,
|
152
|
+
name: Wrap<NamespaceIdent>,
|
153
|
+
props: HashMap<String, String>,
|
154
|
+
) -> RbResult<()> {
|
155
|
+
runtime()
|
156
|
+
.block_on(
|
157
|
+
self.catalog
|
158
|
+
.borrow()
|
159
|
+
.as_catalog()
|
160
|
+
.create_namespace(&name.0, props),
|
161
|
+
)
|
162
|
+
.map_err(to_rb_err)?;
|
163
|
+
Ok(())
|
164
|
+
}
|
165
|
+
|
166
|
+
pub fn namespace_exists(&self, name: Wrap<NamespaceIdent>) -> RbResult<bool> {
|
167
|
+
let exists = runtime()
|
168
|
+
.block_on(self.catalog.borrow().as_catalog().namespace_exists(&name.0))
|
169
|
+
.map_err(to_rb_err)?;
|
170
|
+
Ok(exists)
|
171
|
+
}
|
172
|
+
|
173
|
+
pub fn namespace_properties(
|
174
|
+
&self,
|
175
|
+
name: Wrap<NamespaceIdent>,
|
176
|
+
) -> RbResult<HashMap<String, String>> {
|
177
|
+
let namespace = runtime()
|
178
|
+
.block_on(self.catalog.borrow().as_catalog().get_namespace(&name.0))
|
179
|
+
.map_err(to_rb_err)?;
|
180
|
+
Ok(namespace.properties().clone())
|
181
|
+
}
|
182
|
+
|
183
|
+
pub fn update_namespace(
|
184
|
+
&self,
|
185
|
+
name: Wrap<NamespaceIdent>,
|
186
|
+
props: HashMap<String, String>,
|
187
|
+
) -> RbResult<()> {
|
188
|
+
runtime()
|
189
|
+
.block_on(
|
190
|
+
self.catalog
|
191
|
+
.borrow()
|
192
|
+
.as_catalog()
|
193
|
+
.update_namespace(&name.0, props),
|
194
|
+
)
|
195
|
+
.map_err(to_rb_err)?;
|
196
|
+
Ok(())
|
197
|
+
}
|
198
|
+
|
199
|
+
pub fn drop_namespace(&self, name: Wrap<NamespaceIdent>) -> RbResult<()> {
|
200
|
+
runtime()
|
201
|
+
.block_on(self.catalog.borrow().as_catalog().drop_namespace(&name.0))
|
202
|
+
.map_err(to_rb_err)?;
|
203
|
+
Ok(())
|
204
|
+
}
|
205
|
+
|
206
|
+
pub fn list_tables(&self, namespace: Wrap<NamespaceIdent>) -> RbResult<Vec<Vec<String>>> {
|
207
|
+
let tables = runtime()
|
208
|
+
.block_on(self.catalog.borrow().as_catalog().list_tables(&namespace.0))
|
209
|
+
.map_err(to_rb_err)?;
|
210
|
+
Ok(tables
|
211
|
+
.iter()
|
212
|
+
.map(|v| {
|
213
|
+
let mut vec = v.namespace.clone().inner();
|
214
|
+
vec.push(v.name.clone());
|
215
|
+
vec
|
216
|
+
})
|
217
|
+
.collect())
|
218
|
+
}
|
219
|
+
|
220
|
+
pub fn create_table(
|
221
|
+
&self,
|
222
|
+
name: Wrap<TableIdent>,
|
223
|
+
schema: Wrap<Schema>,
|
224
|
+
location: Option<String>,
|
225
|
+
) -> RbResult<RbTable> {
|
226
|
+
let creation = TableCreation::builder()
|
227
|
+
.name(name.0.name)
|
228
|
+
.schema(schema.0)
|
229
|
+
.location_opt(location)
|
230
|
+
.build();
|
231
|
+
let table = runtime()
|
232
|
+
.block_on(
|
233
|
+
self.catalog
|
234
|
+
.borrow()
|
235
|
+
.as_catalog()
|
236
|
+
.create_table(&name.0.namespace, creation),
|
237
|
+
)
|
238
|
+
.map_err(to_rb_err)?;
|
239
|
+
Ok(RbTable {
|
240
|
+
table: table.into(),
|
241
|
+
})
|
242
|
+
}
|
243
|
+
|
244
|
+
pub fn load_table(&self, name: Wrap<TableIdent>) -> RbResult<RbTable> {
|
245
|
+
let table = runtime()
|
246
|
+
.block_on(self.catalog.borrow().as_catalog().load_table(&name.0))
|
247
|
+
.map_err(to_rb_err)?;
|
248
|
+
Ok(RbTable {
|
249
|
+
table: table.into(),
|
250
|
+
})
|
251
|
+
}
|
252
|
+
|
253
|
+
pub fn drop_table(&self, name: Wrap<TableIdent>) -> RbResult<()> {
|
254
|
+
runtime()
|
255
|
+
.block_on(self.catalog.borrow().as_catalog().drop_table(&name.0))
|
256
|
+
.map_err(to_rb_err)?;
|
257
|
+
Ok(())
|
258
|
+
}
|
259
|
+
|
260
|
+
pub fn table_exists(&self, name: Wrap<TableIdent>) -> RbResult<bool> {
|
261
|
+
let exists = runtime()
|
262
|
+
.block_on(self.catalog.borrow().as_catalog().table_exists(&name.0))
|
263
|
+
.map_err(to_rb_err)?;
|
264
|
+
Ok(exists)
|
265
|
+
}
|
266
|
+
|
267
|
+
pub fn rename_table(&self, name: Wrap<TableIdent>, new_name: Wrap<TableIdent>) -> RbResult<()> {
|
268
|
+
runtime()
|
269
|
+
.block_on(
|
270
|
+
self.catalog
|
271
|
+
.borrow()
|
272
|
+
.as_catalog()
|
273
|
+
.rename_table(&name.0, &new_name.0),
|
274
|
+
)
|
275
|
+
.map_err(to_rb_err)?;
|
276
|
+
Ok(())
|
277
|
+
}
|
278
|
+
|
279
|
+
pub fn register_table(
|
280
|
+
&self,
|
281
|
+
name: Wrap<TableIdent>,
|
282
|
+
metadata_location: String,
|
283
|
+
) -> RbResult<()> {
|
284
|
+
runtime()
|
285
|
+
.block_on(
|
286
|
+
self.catalog
|
287
|
+
.borrow()
|
288
|
+
.as_catalog()
|
289
|
+
.register_table(&name.0, metadata_location),
|
290
|
+
)
|
291
|
+
.map_err(to_rb_err)?;
|
292
|
+
Ok(())
|
293
|
+
}
|
294
|
+
|
295
|
+
#[cfg(feature = "datafusion")]
|
296
|
+
pub fn query(&self, sql: String) -> RbResult<()> {
|
297
|
+
let runtime = runtime();
|
298
|
+
|
299
|
+
// TODO only create context once
|
300
|
+
let catalog = self.catalog.borrow().as_arc();
|
301
|
+
let provider = runtime
|
302
|
+
.block_on(IcebergCatalogProvider::try_new(catalog))
|
303
|
+
.unwrap();
|
304
|
+
let ctx = SessionContext::new();
|
305
|
+
ctx.register_catalog("datafusion", Arc::new(provider));
|
306
|
+
|
307
|
+
let df = runtime.block_on(ctx.sql(&sql)).unwrap();
|
308
|
+
let _results = runtime.block_on(df.collect()).unwrap();
|
309
|
+
|
310
|
+
// println!("{:?}", df.schema().fields());
|
311
|
+
// println!("{:?}", results);
|
312
|
+
|
313
|
+
Ok(())
|
314
|
+
}
|
315
|
+
}
|
@@ -0,0 +1,32 @@
|
|
1
|
+
use magnus::{Error as RbErr, RModule, Ruby, prelude::*};
|
2
|
+
|
3
|
+
pub fn to_rb_err(err: iceberg::Error) -> RbErr {
|
4
|
+
let class_name = match err.kind() {
|
5
|
+
iceberg::ErrorKind::NamespaceAlreadyExists => "NamespaceAlreadyExistsError",
|
6
|
+
iceberg::ErrorKind::NamespaceNotFound => "NamespaceNotFoundError",
|
7
|
+
iceberg::ErrorKind::TableAlreadyExists => "TableAlreadyExistsError",
|
8
|
+
iceberg::ErrorKind::TableNotFound => "TableNotFoundError",
|
9
|
+
iceberg::ErrorKind::FeatureUnsupported => "UnsupportedFeatureError",
|
10
|
+
iceberg::ErrorKind::DataInvalid => "InvalidDataError",
|
11
|
+
_ => "Error",
|
12
|
+
};
|
13
|
+
|
14
|
+
let class = Ruby::get()
|
15
|
+
.unwrap()
|
16
|
+
.class_object()
|
17
|
+
.const_get::<_, RModule>("Iceberg")
|
18
|
+
.unwrap()
|
19
|
+
.const_get(class_name)
|
20
|
+
.unwrap();
|
21
|
+
|
22
|
+
// no way to get context separately
|
23
|
+
// https://github.com/apache/iceberg-rust/issues/1071
|
24
|
+
let message = err.to_string();
|
25
|
+
let message = message
|
26
|
+
// TODO improve
|
27
|
+
.strip_prefix("Unexpected => ")
|
28
|
+
.map(|v| v.to_string())
|
29
|
+
.unwrap_or(message);
|
30
|
+
|
31
|
+
RbErr::new(class, message)
|
32
|
+
}
|
@@ -0,0 +1,127 @@
|
|
1
|
+
mod arrow;
|
2
|
+
mod catalog;
|
3
|
+
mod error;
|
4
|
+
mod runtime;
|
5
|
+
mod scan;
|
6
|
+
mod table;
|
7
|
+
mod utils;
|
8
|
+
|
9
|
+
use magnus::{Error as RbErr, Ruby, function, method, prelude::*};
|
10
|
+
|
11
|
+
use crate::catalog::RbCatalog;
|
12
|
+
use crate::scan::RbTableScan;
|
13
|
+
use crate::table::RbTable;
|
14
|
+
|
15
|
+
type RbResult<T> = Result<T, RbErr>;
|
16
|
+
|
17
|
+
#[magnus::init]
|
18
|
+
fn init(ruby: &Ruby) -> RbResult<()> {
|
19
|
+
let module = ruby.define_module("Iceberg")?;
|
20
|
+
|
21
|
+
let class = module.define_class("RbCatalog", ruby.class_object())?;
|
22
|
+
#[cfg(feature = "glue")]
|
23
|
+
class.define_singleton_method("new_glue", function!(RbCatalog::new_glue, 1))?;
|
24
|
+
class.define_singleton_method("new_memory", function!(RbCatalog::new_memory, 1))?;
|
25
|
+
#[cfg(feature = "rest")]
|
26
|
+
class.define_singleton_method("new_rest", function!(RbCatalog::new_rest, 3))?;
|
27
|
+
#[cfg(feature = "sql")]
|
28
|
+
class.define_singleton_method("new_sql", function!(RbCatalog::new_sql, 4))?;
|
29
|
+
class.define_method("list_namespaces", method!(RbCatalog::list_namespaces, 1))?;
|
30
|
+
class.define_method("create_namespace", method!(RbCatalog::create_namespace, 2))?;
|
31
|
+
class.define_method("namespace_exists?", method!(RbCatalog::namespace_exists, 1))?;
|
32
|
+
class.define_method(
|
33
|
+
"namespace_properties",
|
34
|
+
method!(RbCatalog::namespace_properties, 1),
|
35
|
+
)?;
|
36
|
+
class.define_method("update_namespace", method!(RbCatalog::update_namespace, 2))?;
|
37
|
+
class.define_method("drop_namespace", method!(RbCatalog::drop_namespace, 1))?;
|
38
|
+
class.define_method("list_tables", method!(RbCatalog::list_tables, 1))?;
|
39
|
+
class.define_method("create_table", method!(RbCatalog::create_table, 3))?;
|
40
|
+
class.define_method("load_table", method!(RbCatalog::load_table, 1))?;
|
41
|
+
class.define_method("drop_table", method!(RbCatalog::drop_table, 1))?;
|
42
|
+
class.define_method("table_exists?", method!(RbCatalog::table_exists, 1))?;
|
43
|
+
class.define_method("rename_table", method!(RbCatalog::rename_table, 2))?;
|
44
|
+
class.define_method("register_table", method!(RbCatalog::register_table, 2))?;
|
45
|
+
#[cfg(feature = "datafusion")]
|
46
|
+
class.define_method("query", method!(RbCatalog::query, 1))?;
|
47
|
+
|
48
|
+
let class = module.define_class("RbTable", ruby.class_object())?;
|
49
|
+
class.define_method("scan", method!(RbTable::scan, 1))?;
|
50
|
+
class.define_method("append", method!(RbTable::append, 2))?;
|
51
|
+
class.define_method("format_version", method!(RbTable::format_version, 0))?;
|
52
|
+
class.define_method("uuid", method!(RbTable::uuid, 0))?;
|
53
|
+
class.define_method("location", method!(RbTable::location, 0))?;
|
54
|
+
class.define_method(
|
55
|
+
"last_sequence_number",
|
56
|
+
method!(RbTable::last_sequence_number, 0),
|
57
|
+
)?;
|
58
|
+
class.define_method(
|
59
|
+
"next_sequence_number",
|
60
|
+
method!(RbTable::next_sequence_number, 0),
|
61
|
+
)?;
|
62
|
+
class.define_method("last_column_id", method!(RbTable::last_column_id, 0))?;
|
63
|
+
class.define_method("last_partition_id", method!(RbTable::last_partition_id, 0))?;
|
64
|
+
class.define_method("last_updated_ms", method!(RbTable::last_updated_ms, 0))?;
|
65
|
+
class.define_method("schemas", method!(RbTable::schemas, 0))?;
|
66
|
+
class.define_method("schema_by_id", method!(RbTable::schema_by_id, 1))?;
|
67
|
+
class.define_method("current_schema", method!(RbTable::current_schema, 0))?;
|
68
|
+
class.define_method("current_schema_id", method!(RbTable::current_schema_id, 0))?;
|
69
|
+
class.define_method("partition_specs", method!(RbTable::partition_specs, 0))?;
|
70
|
+
class.define_method(
|
71
|
+
"partition_spec_by_id",
|
72
|
+
method!(RbTable::partition_spec_by_id, 1),
|
73
|
+
)?;
|
74
|
+
class.define_method(
|
75
|
+
"default_partition_spec",
|
76
|
+
method!(RbTable::default_partition_spec, 0),
|
77
|
+
)?;
|
78
|
+
class.define_method(
|
79
|
+
"default_partition_spec_id",
|
80
|
+
method!(RbTable::default_partition_spec_id, 0),
|
81
|
+
)?;
|
82
|
+
class.define_method("snapshots", method!(RbTable::snapshots, 0))?;
|
83
|
+
class.define_method("snapshot_by_id", method!(RbTable::snapshot_by_id, 1))?;
|
84
|
+
class.define_method("history", method!(RbTable::history, 0))?;
|
85
|
+
class.define_method("metadata_log", method!(RbTable::metadata_log, 0))?;
|
86
|
+
class.define_method("current_snapshot", method!(RbTable::current_snapshot, 0))?;
|
87
|
+
class.define_method(
|
88
|
+
"current_snapshot_id",
|
89
|
+
method!(RbTable::current_snapshot_id, 0),
|
90
|
+
)?;
|
91
|
+
class.define_method("snapshot_for_ref", method!(RbTable::snapshot_for_ref, 1))?;
|
92
|
+
class.define_method("sort_orders", method!(RbTable::sort_orders, 0))?;
|
93
|
+
class.define_method("sort_order_by_id", method!(RbTable::sort_order_by_id, 1))?;
|
94
|
+
class.define_method(
|
95
|
+
"default_sort_order",
|
96
|
+
method!(RbTable::default_sort_order, 0),
|
97
|
+
)?;
|
98
|
+
class.define_method(
|
99
|
+
"default_sort_order_id",
|
100
|
+
method!(RbTable::default_sort_order_id, 0),
|
101
|
+
)?;
|
102
|
+
class.define_method("properties", method!(RbTable::properties, 0))?;
|
103
|
+
class.define_method("statistics", method!(RbTable::statistics, 0))?;
|
104
|
+
class.define_method(
|
105
|
+
"partition_statistics",
|
106
|
+
method!(RbTable::partition_statistics, 0),
|
107
|
+
)?;
|
108
|
+
class.define_method(
|
109
|
+
"statistics_for_snapshot",
|
110
|
+
method!(RbTable::statistics_for_snapshot, 1),
|
111
|
+
)?;
|
112
|
+
class.define_method(
|
113
|
+
"partition_statistics_for_snapshot",
|
114
|
+
method!(RbTable::partition_statistics_for_snapshot, 1),
|
115
|
+
)?;
|
116
|
+
class.define_method("encryption_keys", method!(RbTable::encryption_keys, 0))?;
|
117
|
+
class.define_method("encryption_key", method!(RbTable::encryption_key, 1))?;
|
118
|
+
class.define_singleton_method(
|
119
|
+
"from_metadata_file",
|
120
|
+
function!(RbTable::from_metadata_file, 1),
|
121
|
+
)?;
|
122
|
+
|
123
|
+
let class = module.define_class("RbTableScan", ruby.class_object())?;
|
124
|
+
class.define_method("plan_files", method!(RbTableScan::plan_files, 0))?;
|
125
|
+
|
126
|
+
Ok(())
|
127
|
+
}
|
@@ -0,0 +1,33 @@
|
|
1
|
+
// Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
// or more contributor license agreements. See the NOTICE file
|
3
|
+
// distributed with this work for additional information
|
4
|
+
// regarding copyright ownership. The ASF licenses this file
|
5
|
+
// to you under the Apache License, Version 2.0 (the
|
6
|
+
// "License"); you may not use this file except in compliance
|
7
|
+
// with the License. You may obtain a copy of the License at
|
8
|
+
//
|
9
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
//
|
11
|
+
// Unless required by applicable law or agreed to in writing,
|
12
|
+
// software distributed under the License is distributed on an
|
13
|
+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
// KIND, either express or implied. See the License for the
|
15
|
+
// specific language governing permissions and limitations
|
16
|
+
// under the License.
|
17
|
+
|
18
|
+
use std::sync::OnceLock;
|
19
|
+
|
20
|
+
use tokio::runtime::{Handle, Runtime};
|
21
|
+
|
22
|
+
static RUNTIME: OnceLock<Runtime> = OnceLock::new();
|
23
|
+
|
24
|
+
// TODO https://github.com/apache/iceberg-rust/pull/1396
|
25
|
+
pub fn runtime() -> Handle {
|
26
|
+
match Handle::try_current() {
|
27
|
+
Ok(h) => h.clone(),
|
28
|
+
_ => {
|
29
|
+
let rt = RUNTIME.get_or_init(|| Runtime::new().unwrap());
|
30
|
+
rt.handle().clone()
|
31
|
+
}
|
32
|
+
}
|
33
|
+
}
|
@@ -0,0 +1,47 @@
|
|
1
|
+
use futures::TryStreamExt;
|
2
|
+
use iceberg::scan::TableScan;
|
3
|
+
use magnus::{RArray, Ruby};
|
4
|
+
use std::cell::RefCell;
|
5
|
+
|
6
|
+
use crate::RbResult;
|
7
|
+
use crate::error::to_rb_err;
|
8
|
+
use crate::runtime::runtime;
|
9
|
+
|
10
|
+
#[magnus::wrap(class = "Iceberg::RbTableScan")]
|
11
|
+
pub struct RbTableScan {
|
12
|
+
pub scan: RefCell<TableScan>,
|
13
|
+
}
|
14
|
+
|
15
|
+
impl RbTableScan {
|
16
|
+
pub fn plan_files(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
|
17
|
+
let scan = rb_self.scan.borrow();
|
18
|
+
|
19
|
+
let runtime = runtime();
|
20
|
+
let plan_files = runtime.block_on(scan.plan_files()).map_err(to_rb_err)?;
|
21
|
+
let plan_files: Vec<_> = runtime
|
22
|
+
.block_on(plan_files.try_collect())
|
23
|
+
.map_err(to_rb_err)?;
|
24
|
+
let files = ruby.ary_new();
|
25
|
+
for v in plan_files {
|
26
|
+
let file = ruby.hash_new();
|
27
|
+
file.aset(ruby.to_symbol("start"), v.start)?;
|
28
|
+
file.aset(ruby.to_symbol("length"), v.length)?;
|
29
|
+
file.aset(ruby.to_symbol("record_count"), v.record_count)?;
|
30
|
+
file.aset(ruby.to_symbol("data_file_path"), v.data_file_path)?;
|
31
|
+
file.aset(ruby.to_symbol("project_field_ids"), v.project_field_ids)?;
|
32
|
+
|
33
|
+
let deletes = ruby.ary_new();
|
34
|
+
for d in v.deletes {
|
35
|
+
let delete = ruby.hash_new();
|
36
|
+
delete.aset(ruby.to_symbol("file_path"), d.file_path)?;
|
37
|
+
delete.aset(ruby.to_symbol("partition_spec_id"), d.partition_spec_id)?;
|
38
|
+
delete.aset(ruby.to_symbol("equality_ids"), d.equality_ids)?;
|
39
|
+
deletes.push(delete)?;
|
40
|
+
}
|
41
|
+
file.aset(ruby.to_symbol("deletes"), deletes)?;
|
42
|
+
|
43
|
+
files.push(file)?;
|
44
|
+
}
|
45
|
+
Ok(files)
|
46
|
+
}
|
47
|
+
}
|