rbcsv 0.1.4 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rust-analyzer.json +8 -0
- data/CHANGELOG.md +50 -0
- data/Cargo.lock +123 -44
- data/DEVELOPMENT.md +491 -0
- data/ext/rbcsv/Cargo.toml +6 -1
- data/ext/rbcsv/src/error.rs +87 -0
- data/ext/rbcsv/src/lib.rs +12 -62
- data/ext/rbcsv/src/parser.rs +187 -0
- data/ext/rbcsv/src/ruby_api.rs +98 -0
- data/lib/rbcsv/version.rb +1 -1
- metadata +7 -2
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
use crate::error::{CsvError, ErrorKind};
|
|
2
|
+
use std::fs;
|
|
3
|
+
use std::path::Path;
|
|
4
|
+
|
|
5
|
+
/// CSV解析のオプション設定
|
|
6
|
+
#[derive(Debug, Clone)]
|
|
7
|
+
pub struct CsvParseOptions {
|
|
8
|
+
pub trim: bool,
|
|
9
|
+
// 将来的な拡張用
|
|
10
|
+
// pub headers: bool,
|
|
11
|
+
// pub delimiter: char,
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
impl Default for CsvParseOptions {
|
|
15
|
+
fn default() -> Self {
|
|
16
|
+
Self {
|
|
17
|
+
trim: false,
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/// エスケープシーケンスを実際の文字に変換
|
|
23
|
+
pub fn escape_sanitize(s: &str) -> String {
|
|
24
|
+
s.replace("\\n", "\n")
|
|
25
|
+
.replace("\\r", "\r")
|
|
26
|
+
.replace("\\t", "\t")
|
|
27
|
+
.replace("\\\"", "\"")
|
|
28
|
+
.replace("\\\\", "\\")
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/// 基本的なCSVパース処理
|
|
32
|
+
pub fn parse_csv_core(input: &str, trim_config: csv::Trim) -> Result<Vec<Vec<String>>, CsvError> {
|
|
33
|
+
// 空のデータチェック
|
|
34
|
+
if input.trim().is_empty() {
|
|
35
|
+
return Err(CsvError::empty_data());
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// エスケープシーケンスを実際の文字に変換
|
|
39
|
+
let processed = escape_sanitize(input);
|
|
40
|
+
|
|
41
|
+
let mut reader = csv::ReaderBuilder::new()
|
|
42
|
+
.has_headers(false) // ヘッダーを無効にして、すべての行を読み込む
|
|
43
|
+
.trim(trim_config)
|
|
44
|
+
.from_reader(processed.as_bytes());
|
|
45
|
+
|
|
46
|
+
let mut records = Vec::new();
|
|
47
|
+
|
|
48
|
+
for (line_num, result) in reader.records().enumerate() {
|
|
49
|
+
match result {
|
|
50
|
+
Ok(record) => {
|
|
51
|
+
let row: Vec<String> = record.iter().map(|field| field.to_string()).collect();
|
|
52
|
+
records.push(row);
|
|
53
|
+
}
|
|
54
|
+
Err(e) => {
|
|
55
|
+
// フィールド数不一致エラーを詳細化
|
|
56
|
+
if let csv::ErrorKind::UnequalLengths { expected_len, len, .. } = e.kind() {
|
|
57
|
+
let error_msg = format!(
|
|
58
|
+
"Field count mismatch at line {}: expected {} fields, got {} fields",
|
|
59
|
+
line_num + 1,
|
|
60
|
+
expected_len,
|
|
61
|
+
len
|
|
62
|
+
);
|
|
63
|
+
return Err(CsvError::new(ErrorKind::FieldCountMismatch, error_msg));
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// その他のcsvエラーを自動変換
|
|
67
|
+
return Err(CsvError::from(e));
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
if records.is_empty() {
|
|
73
|
+
return Err(CsvError::empty_data());
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
Ok(records)
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/// オプション設定を使ったCSV解析(文字列用)
|
|
80
|
+
pub fn parse_csv_with_options(input: &str, options: &CsvParseOptions) -> Result<Vec<Vec<String>>, CsvError> {
|
|
81
|
+
let trim_config = if options.trim { csv::Trim::All } else { csv::Trim::None };
|
|
82
|
+
parse_csv_core(input, trim_config)
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/// オプション設定を使ったCSV解析(ファイル用)
|
|
86
|
+
pub fn parse_csv_file_with_options(file_path: &str, options: &CsvParseOptions) -> Result<Vec<Vec<String>>, CsvError> {
|
|
87
|
+
let trim_config = if options.trim { csv::Trim::All } else { csv::Trim::None };
|
|
88
|
+
parse_csv_file(file_path, trim_config)
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/// ファイルからCSVを読み込んでパースする
|
|
92
|
+
pub fn parse_csv_file(file_path: &str, trim_config: csv::Trim) -> Result<Vec<Vec<String>>, CsvError> {
|
|
93
|
+
// ファイルパスの検証
|
|
94
|
+
let path = Path::new(file_path);
|
|
95
|
+
if !path.exists() {
|
|
96
|
+
return Err(CsvError::io(format!("File not found: {}", file_path)));
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
if !path.is_file() {
|
|
100
|
+
return Err(CsvError::io(format!("Path is not a file: {}", file_path)));
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// ファイル読み込み
|
|
104
|
+
let content = match fs::read_to_string(path) {
|
|
105
|
+
Ok(content) => content,
|
|
106
|
+
Err(e) => {
|
|
107
|
+
return Err(CsvError::io(format!("Failed to read file '{}': {}", file_path, e)));
|
|
108
|
+
}
|
|
109
|
+
};
|
|
110
|
+
|
|
111
|
+
// CSVパース
|
|
112
|
+
parse_csv_core(&content, trim_config)
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
#[cfg(test)]
|
|
116
|
+
mod tests {
|
|
117
|
+
use super::*;
|
|
118
|
+
|
|
119
|
+
#[test]
|
|
120
|
+
fn test_escape_sanitize() {
|
|
121
|
+
let input = "Hello\\nWorld\\t\\\"Test\\\"\\\\End";
|
|
122
|
+
let expected = "Hello\nWorld\t\"Test\"\\End";
|
|
123
|
+
assert_eq!(escape_sanitize(input), expected);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
#[test]
|
|
127
|
+
fn test_parse_csv_core_basic() {
|
|
128
|
+
let csv_data = "a,b,c\n1,2,3";
|
|
129
|
+
let result = parse_csv_core(csv_data, csv::Trim::None);
|
|
130
|
+
|
|
131
|
+
assert!(result.is_ok());
|
|
132
|
+
let records = result.unwrap();
|
|
133
|
+
assert_eq!(records.len(), 2);
|
|
134
|
+
assert_eq!(records[0], vec!["a", "b", "c"]);
|
|
135
|
+
assert_eq!(records[1], vec!["1", "2", "3"]);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
#[test]
|
|
139
|
+
fn test_parse_csv_file_not_found() {
|
|
140
|
+
let result = parse_csv_file("non_existent_file.csv", csv::Trim::None);
|
|
141
|
+
|
|
142
|
+
assert!(result.is_err());
|
|
143
|
+
if let Err(e) = result {
|
|
144
|
+
assert!(e.to_string().contains("File not found"));
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
#[test]
|
|
149
|
+
fn test_parse_csv_file_directory() {
|
|
150
|
+
// ディレクトリを指定した場合のテスト
|
|
151
|
+
let result = parse_csv_file(".", csv::Trim::None);
|
|
152
|
+
|
|
153
|
+
assert!(result.is_err());
|
|
154
|
+
if let Err(e) = result {
|
|
155
|
+
assert!(e.to_string().contains("Path is not a file"));
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
#[test]
|
|
160
|
+
fn test_parse_csv_file_with_temp_file() {
|
|
161
|
+
use std::io::Write;
|
|
162
|
+
use std::fs::File;
|
|
163
|
+
|
|
164
|
+
// 一時ファイルを作成
|
|
165
|
+
let temp_path = "/tmp/test_csv_file.csv";
|
|
166
|
+
let csv_content = "name,age,city\nAlice,25,Tokyo\nBob,30,Osaka";
|
|
167
|
+
|
|
168
|
+
{
|
|
169
|
+
let mut file = File::create(temp_path).expect("Failed to create temp file");
|
|
170
|
+
file.write_all(csv_content.as_bytes()).expect("Failed to write to temp file");
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// ファイルからCSVを読み込み
|
|
174
|
+
let result = parse_csv_file(temp_path, csv::Trim::None);
|
|
175
|
+
|
|
176
|
+
// クリーンアップ
|
|
177
|
+
let _ = std::fs::remove_file(temp_path);
|
|
178
|
+
|
|
179
|
+
// 結果を検証
|
|
180
|
+
assert!(result.is_ok());
|
|
181
|
+
let records = result.unwrap();
|
|
182
|
+
assert_eq!(records.len(), 3);
|
|
183
|
+
assert_eq!(records[0], vec!["name", "age", "city"]);
|
|
184
|
+
assert_eq!(records[1], vec!["Alice", "25", "Tokyo"]);
|
|
185
|
+
assert_eq!(records[2], vec!["Bob", "30", "Osaka"]);
|
|
186
|
+
}
|
|
187
|
+
}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
use magnus::{Error as MagnusError, Ruby};
|
|
2
|
+
use crate::parser::{parse_csv_core, parse_csv_file};
|
|
3
|
+
|
|
4
|
+
/// CSV文字列をパースする(通常版)
|
|
5
|
+
///
|
|
6
|
+
/// # Arguments
|
|
7
|
+
/// * `ruby` - Ruby VMの参照
|
|
8
|
+
/// * `s` - パースするCSV文字列
|
|
9
|
+
///
|
|
10
|
+
/// # Returns
|
|
11
|
+
/// * `Result<Vec<Vec<String>>, MagnusError>` - パース結果またはエラー
|
|
12
|
+
pub fn parse(ruby: &Ruby, s: String) -> Result<Vec<Vec<String>>, MagnusError> {
|
|
13
|
+
parse_csv_core(&s, csv::Trim::None)
|
|
14
|
+
.map_err(|e| MagnusError::new(ruby.exception_runtime_error(), e.to_string()))
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/// CSV文字列をパースする(trim版)
|
|
18
|
+
///
|
|
19
|
+
/// # Arguments
|
|
20
|
+
/// * `ruby` - Ruby VMの参照
|
|
21
|
+
/// * `s` - パースするCSV文字列
|
|
22
|
+
///
|
|
23
|
+
/// # Returns
|
|
24
|
+
/// * `Result<Vec<Vec<String>>, MagnusError>` - パース結果またはエラー
|
|
25
|
+
pub fn parse_trim(ruby: &Ruby, s: String) -> Result<Vec<Vec<String>>, MagnusError> {
|
|
26
|
+
parse_csv_core(&s, csv::Trim::All)
|
|
27
|
+
.map_err(|e| MagnusError::new(ruby.exception_runtime_error(), e.to_string()))
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/// CSVファイルを読み込む(通常版)
|
|
31
|
+
///
|
|
32
|
+
/// # Arguments
|
|
33
|
+
/// * `ruby` - Ruby VMの参照
|
|
34
|
+
/// * `file_path` - 読み込むCSVファイルのパス
|
|
35
|
+
///
|
|
36
|
+
/// # Returns
|
|
37
|
+
/// * `Result<Vec<Vec<String>>, MagnusError>` - パース結果またはエラー
|
|
38
|
+
pub fn read(ruby: &Ruby, file_path: String) -> Result<Vec<Vec<String>>, MagnusError> {
|
|
39
|
+
parse_csv_file(&file_path, csv::Trim::None)
|
|
40
|
+
.map_err(|e| MagnusError::new(ruby.exception_runtime_error(), e.to_string()))
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/// CSVファイルを読み込む(trim版)
|
|
44
|
+
///
|
|
45
|
+
/// # Arguments
|
|
46
|
+
/// * `ruby` - Ruby VMの参照
|
|
47
|
+
/// * `file_path` - 読み込むCSVファイルのパス
|
|
48
|
+
///
|
|
49
|
+
/// # Returns
|
|
50
|
+
/// * `Result<Vec<Vec<String>>, MagnusError>` - パース結果またはエラー
|
|
51
|
+
pub fn read_trim(ruby: &Ruby, file_path: String) -> Result<Vec<Vec<String>>, MagnusError> {
|
|
52
|
+
parse_csv_file(&file_path, csv::Trim::All)
|
|
53
|
+
.map_err(|e| MagnusError::new(ruby.exception_runtime_error(), e.to_string()))
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
#[cfg(test)]
|
|
58
|
+
mod tests {
|
|
59
|
+
use super::*;
|
|
60
|
+
|
|
61
|
+
#[test]
|
|
62
|
+
fn test_parse_basic() {
|
|
63
|
+
let csv_data = "a,b,c\n1,2,3";
|
|
64
|
+
let result = crate::parser::parse_csv_core(csv_data, csv::Trim::None);
|
|
65
|
+
|
|
66
|
+
assert!(result.is_ok());
|
|
67
|
+
let records = result.unwrap();
|
|
68
|
+
assert_eq!(records.len(), 2);
|
|
69
|
+
assert_eq!(records[0], vec!["a", "b", "c"]);
|
|
70
|
+
assert_eq!(records[1], vec!["1", "2", "3"]);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
#[test]
|
|
74
|
+
fn test_parse_with_trim_enabled() {
|
|
75
|
+
let csv_data = " a , b , c \n 1 , 2 , 3 ";
|
|
76
|
+
let result = crate::parser::parse_csv_core(csv_data, csv::Trim::All);
|
|
77
|
+
|
|
78
|
+
assert!(result.is_ok());
|
|
79
|
+
let records = result.unwrap();
|
|
80
|
+
assert_eq!(records[0], vec!["a", "b", "c"]);
|
|
81
|
+
assert_eq!(records[1], vec!["1", "2", "3"]);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
#[test]
|
|
85
|
+
fn test_parse_with_trim_disabled() {
|
|
86
|
+
let csv_data = " a , b , c \n 1 , 2 , 3 ";
|
|
87
|
+
let result = crate::parser::parse_csv_core(csv_data, csv::Trim::None);
|
|
88
|
+
|
|
89
|
+
assert!(result.is_ok());
|
|
90
|
+
let records = result.unwrap();
|
|
91
|
+
assert_eq!(records[0], vec![" a ", " b ", " c "]);
|
|
92
|
+
assert_eq!(records[1], vec![" 1 ", " 2 ", " 3 "]);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Note: Ruby API functions that return MagnusError cannot be tested
|
|
96
|
+
// in unit tests because they require a Ruby VM context.
|
|
97
|
+
// File reading functionality is tested in the parser module.
|
|
98
|
+
}
|
data/lib/rbcsv/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: rbcsv
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.7
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- fujitani sora
|
|
@@ -33,6 +33,7 @@ extensions:
|
|
|
33
33
|
extra_rdoc_files: []
|
|
34
34
|
files:
|
|
35
35
|
- ".ruby-version"
|
|
36
|
+
- ".rust-analyzer.json"
|
|
36
37
|
- ".serena/.gitignore"
|
|
37
38
|
- ".serena/memories/code_style_conventions.md"
|
|
38
39
|
- ".serena/memories/project_overview.md"
|
|
@@ -43,13 +44,17 @@ files:
|
|
|
43
44
|
- CODE_OF_CONDUCT.md
|
|
44
45
|
- Cargo.lock
|
|
45
46
|
- Cargo.toml
|
|
47
|
+
- DEVELOPMENT.md
|
|
46
48
|
- LICENSE.txt
|
|
47
49
|
- README.md
|
|
48
50
|
- Rakefile
|
|
49
51
|
- benchmark.rb
|
|
50
52
|
- ext/rbcsv/Cargo.toml
|
|
51
53
|
- ext/rbcsv/extconf.rb
|
|
54
|
+
- ext/rbcsv/src/error.rs
|
|
52
55
|
- ext/rbcsv/src/lib.rs
|
|
56
|
+
- ext/rbcsv/src/parser.rs
|
|
57
|
+
- ext/rbcsv/src/ruby_api.rs
|
|
53
58
|
- lib/rbcsv.rb
|
|
54
59
|
- lib/rbcsv/version.rb
|
|
55
60
|
- output_comparison.rb
|
|
@@ -81,7 +86,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
81
86
|
- !ruby/object:Gem::Version
|
|
82
87
|
version: 3.3.11
|
|
83
88
|
requirements: []
|
|
84
|
-
rubygems_version: 3.
|
|
89
|
+
rubygems_version: 3.7.2
|
|
85
90
|
specification_version: 4
|
|
86
91
|
summary: High-performance CSV processing library with Rust extensions
|
|
87
92
|
test_files: []
|